if ( !require("pacman", quietly = TRUE) )
install.packages("pacman")
pacman::p_load(
tidyverse, #Used for basic data handling and visualization.
RColorBrewer, #Color palettes for data visualization.
gridExtra, #Used to arrange multiple ggplots in a grid.
grid, #Used to arrange multiple ggplots in a grid.
overviewR, #Used to assess missing data.
rnaturalearth, #Used to extract geographical data to create maps.
sf, #Used together with the prior package to create map.
plotly, #Used together with prior two packages to create map.
table1, #Used to add labels to variables and to create table of descriptive characteristics of patients.
flextable, #Used to export tables.
officer, #Used to export tables.
mgcv, #Used to model non-linear relationships with a general additive model.
ggmosaic, #Used to create mosaic plots.
car, #Used to visualize distribution of continuous variables (stacked Q-Q plots).
simpleboot, #Used to calculate mean atelectasis coverage and confidence intervals trhough bootstrapping.
boot, #Used to calculate mean atelectasis coverage and confidence intervals trhough bootstrapping.
dagitty, #Used in conjunction with https://www.dagitty.net/ to create directed acyclic graph to inform statistical modelling.
lavaan, #Used to create correlation matrix to assess conditional independencies.
broom, #Used to exponentiate coefficients of regression models.
sandwich, #Used to calculate robust standard errors for prevalence ratios.
ordinal, #Used to model ordinal outcome (atelectasis percent) and to test proportional odds assumptions.
VGAM, #Used to model partial proportional odds model.
gt, #Used to present a summary of the results of regression models.
gtsummary, #Used to create table to summarize regression models.
gratia #Used together with gglopt2 to create smooth partial effects plot from gam models.
)Set seed (needed for reproducibility of bootstrapping) as the current year 2023:
# Credits for this chunk of code: Alex Bossers, Utrecht University (a.bossers@uu.nl)
# remove clutter
session <- sessionInfo()
session$BLAS <- NULL; session$LAPACK <- NULL; session$loadedOnly <- NULL;
# write log file
writeLines(capture.output( print( session, locale=FALSE ) ),
paste0(lubridate::today(),"_sessionInfo_prevalencia_atelectasias.txt") )
# also show in notebook.
session ## R version 4.3.2 (2023-10-31 ucrt)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 11 x64 (build 22621)
##
## Matrix products: default
##
##
## locale:
## [1] LC_COLLATE=Spanish_Mexico.utf8 LC_CTYPE=Spanish_Mexico.utf8
## [3] LC_MONETARY=Spanish_Mexico.utf8 LC_NUMERIC=C
## [5] LC_TIME=Spanish_Mexico.utf8
##
## time zone: Europe/Berlin
## tzcode source: internal
##
## attached base packages:
## [1] splines stats4 grid stats graphics grDevices utils
## [8] datasets methods base
##
## other attached packages:
## [1] gratia_0.8.1 gtsummary_1.7.2 gt_0.10.0
## [4] VGAM_1.1-9 ordinal_2022.11-16 sandwich_3.0-2
## [7] broom_1.0.5 lavaan_0.6-16 dagitty_0.3-1
## [10] boot_1.3-28.1 simpleboot_1.1-7 car_3.1-2
## [13] carData_3.0-5 ggmosaic_0.3.3 mgcv_1.9-0
## [16] nlme_3.1-163 officer_0.6.3 flextable_0.9.4
## [19] table1_1.4.3 plotly_4.10.3 sf_1.0-14
## [22] rnaturalearth_0.3.4 overviewR_0.0.13 gridExtra_2.3
## [25] RColorBrewer_1.1-3 lubridate_1.9.3 forcats_1.0.0
## [28] stringr_1.5.0 dplyr_1.1.3 purrr_1.0.2
## [31] readr_2.1.4 tidyr_1.3.0 tibble_3.2.1
## [34] ggplot2_3.4.4 tidyverse_2.0.0 pacman_0.5.1
Set working directory and create sub-folders
figfolder <- "01_output_figures"
tabfolder <- "01_output_tables"
psfolder <- "02_ps_subsets"
dir.create( figfolder, showWarnings=FALSE )
dir.create( tabfolder, showWarnings=FALSE )
dir.create( psfolder, showWarnings=FALSE )Load dataset
## 'data.frame': 243 obs. of 42 variables:
## $ ID : int 1 2 3 4 5 6 7 8 9 10 ...
## $ age : int 41 50 45 35 28 52 46 56 59 57 ...
## $ sex : int 1 1 1 1 1 1 1 1 2 1 ...
## $ weight : num 116 116 146 101 141 ...
## $ height : num 1.62 1.71 1.67 1.48 1.72 1.7 1.75 1.63 1.78 1.62 ...
## $ BMI : num 44.1 39.5 52.2 46.2 47.7 ...
## $ type_obesity : int 3 2 3 3 3 1 2 3 3 1 ...
## $ ARISCAT : int 23 26 23 23 39 26 23 26 26 26 ...
## $ ARISCAT_group : int 1 2 1 1 2 2 1 2 2 2 ...
## $ ASA : int 2 2 3 2 2 2 2 2 2 NA ...
## $ spo2_VPO : int 97 94 93 92 90 96 96 96 97 93 ...
## $ surgical_procedure : int 2 1 1 1 1 1 1 1 1 4 ...
## $ CORADS : int 1 1 1 1 1 1 1 1 1 1 ...
## $ atelectasis : int 0 1 1 1 1 0 0 0 0 1 ...
## $ atelectasis_location: int NA 1 1 1 2 NA NA NA NA 1 ...
## $ atelectasis_percent : num 0 2.5 7.5 15 7.5 0 0 0 0 5 ...
## $ hb : num 13.9 14.2 14.2 14.8 14.1 13.8 16.4 14.1 14.2 13.9 ...
## $ hct : num 41.4 41.8 41.7 43.6 42.2 39.9 46.2 41.9 42.2 39.9 ...
## $ leu : num 11.6 5.4 8.6 9.4 7.7 9.8 5.9 10.5 9.5 6.1 ...
## $ neu_percent : int 60 62 58 63 68 60 59 67 62 53 ...
## $ neu_absolute : num 6.96 3.35 4.99 5.92 5.24 ...
## $ linf_percent : int 39 33 38 35 31 38 38 31 36 44 ...
## $ linf_absolute : num 4.52 1.78 3.27 3.29 2.39 ...
## $ mon_percent : int 1 4 3 1 1 2 2 2 1 2 ...
## $ mon_absolute : num 4.52 1.78 3.27 3.29 2.39 ...
## $ platelets : int 364 217 218 318 332 392 268 387 322 225 ...
## $ glucose : int 74 89 74 76 80 90 121 84 80 98 ...
## $ urea : int 21 26 27 14 18 15 28 22 21 32 ...
## $ creatinine : num 0.88 0.82 0.64 0.74 0.72 0.6 0.77 0.6 0.71 0.59 ...
## $ rapid_covid_test : int 0 0 0 0 0 0 0 0 0 0 ...
## $ PCR_covid : int NA NA NA NA NA NA NA NA NA NA ...
## $ surgery_performed : int 1 1 1 1 1 1 1 1 1 1 ...
## $ state_residence : chr "Texas" "Texas" "Texas" "Arkansas" ...
## $ altitude : int 519 519 519 198 885 1861 519 31 519 519 ...
## $ hypertension : int 0 0 0 0 0 0 0 1 1 0 ...
## $ diabetes : int 0 0 0 0 0 0 0 0 1 0 ...
## $ sleep_apnea : int 0 0 0 0 1 0 0 0 0 0 ...
## $ hypothyroidism : int 1 0 1 0 0 0 0 0 0 0 ...
## $ dyslipidemia : int 0 0 0 0 0 0 0 1 0 0 ...
## $ antidepressant_use : int 0 1 0 0 1 1 0 0 0 0 ...
## $ prior_covid19 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ other_comorb : int 0 1 1 0 1 0 1 1 1 0 ...
Recode variables
table1::label(data$age) <- "Age"
units(data$age) <- "years"
table1::label(data$sex) <- "Sex"
data$sex <- factor(data$sex,
levels=c(1,2),
labels=c("Woman", "Man")
)
table1::label(data$weight) <- "Weight"
units(data$weight) <- "kilograms (kg)"
table1::label(data$height) <- "Height"
units(data$height) <- "meters (m)"
table1::label(data$type_obesity) <- "Obesity class"
data$type_obesity <- factor(data$type_obesity,
levels=c(1,2,3),
labels=c("Class 1 Obesity",
"Class 2 Obesity",
"Class 3 Obesity")
)
table1::label(data$ARISCAT) <- "ARISCAT"
units(data$ARISCAT) <- "score"
table1::label(data$ARISCAT_group) <- "ARISCAT risk group"
data$ARISCAT_group <- factor(data$ARISCAT_group,
levels=c(1,2,3),
labels=c("Low Risk",
"Intermediate Risk",
"High Risk")
)
table1::label(data$ASA) <- "ASA physical status"
data$ASA <- factor(data$ASA,
levels=c(1,2,3),
labels=c("ASA 1", "ASA 2", "ASA 3")
)
table1::label(data$spo2_VPO) <- "Oxygen saturation (SpO2)"
units(data$spo2_VPO) <- "%"
table1::label(data$surgical_procedure) <- "Surgical procedure"
data$surgical_procedure <- factor(data$surgical_procedure,
levels=c(1,2,3,4),
labels=c("SG", # 'SG' = Sleeve Gastrectomy
"RYGB", # 'RYGB' = Roux-en-Y gastric bypass
"OAGB", # 'OAGB' = One anastomosis gastric bypass
"LBGS") # 'LBGS' = lap-band to sleeve
)
table1::label(data$CORADS) <- "CO-RADS"
data$CORADS <- factor(data$CORADS,
levels=c(1,2,3,4),
labels=c("CO-RADS 1",
"CO-RADS 2",
"CO-RADS 3",
"CO-RADS 4"),
ordered = TRUE
)
table1::label(data$atelectasis) <- "Atelectasis"
data$atelectasis <- factor(data$atelectasis,
levels=c(0,1),
labels=c("No", "Yes")
) %>%
relevel("Yes","No") #Relevel done for convenience when visualizing 2x2 tables.
table1::label(data$atelectasis_location) <- "Atelectasis location"
data$atelectasis_location <- factor(data$atelectasis_location,
levels=c(1,2),
labels=c("Unilateral", "Bilateral")
)
table1::label(data$atelectasis_percent) <- "Percentage of atelectasis"
units(data$atelectasis_percent) <- "%"
table1::label(data$hb) <- "Hemoglobin"
units(data$hb) <- "g/dL"
table1::label(data$hct) <- "Hematocrit"
units(data$hct) <- "%"
table1::label(data$leu) <- "WBC count"
units(data$leu) <- "10³/µL"
table1::label(data$neu_percent) <- "Neutrophils (percent)"
units(data$neu_percent) <- "%"
table1::label(data$neu_absolute) <- "Neutrophils (absolute)"
units(data$neu_absolute) <- "10³/µL"
table1::label(data$linf_percent) <- "Lymphocytes (percent)"
units(data$linf_percent) <- "%"
table1::label(data$linf_absolute) <- "Lymphocytes (absolute)"
units(data$linf_absolute) <- "10³/µL"
table1::label(data$mon_percent) <- "Monocytes (percent)"
units(data$mon_percent) <- "%"
table1::label(data$mon_absolute) <- "Monocytes (absolute)"
units(data$mon_absolute) <- "10³/µL"
table1::label(data$platelets) <- "Platelets"
units(data$platelets) <- "cells/µL"
table1::label(data$glucose) <- "Glucose"
units(data$glucose) <- "mg/dL"
table1::label(data$urea) <- "Urea"
units(data$urea) <- "mg/dL"
table1::label(data$creatinine) <- "Creatinine"
units(data$creatinine) <- "mg/dL"
data$rapid_covid_test <- as.factor(data$rapid_covid_test) %>%
fct_recode(negative = "0")
data$PCR_covid <- as.factor(data$PCR_covid) %>% fct_recode(negative = "0")
table1::label(data$state_residence) <- "State of residence"
data$state_residence <- as.factor(data$state_residence)
data$altitude <- as.numeric(data$altitude)
table1::label(data$altitude) <- "Mean altitude"
units(data$altitude) <- "meters"
table1::label(data$surgery_performed) <- "Surgery performed"
data$surgery_performed <- factor(data$surgery_performed,
levels=c(0,1),
labels=c("No", "Yes")
)
table1::label(data$hypertension) <- "Hypertension"
data$hypertension <- factor(data$hypertension,
levels=c(0,1),
labels=c("No", "Yes")
)
table1::label(data$diabetes) <- "Diabetes"
data$diabetes <- factor(data$diabetes,
levels=c(0,1),
labels=c("No", "Yes")
)
table1::label(data$sleep_apnea) <- "Obstructive sleep apnea"
data$sleep_apnea <- factor(data$sleep_apnea,
levels=c(0,1),
labels=c("No", "Yes")
)
table1::label(data$hypothyroidism) <- "Hypothyroidism"
data$hypothyroidism <- factor(data$hypothyroidism,
levels=c(0,1),
labels=c("No", "Yes")
)
table1::label(data$dyslipidemia) <- "Dyslipidemia"
data$dyslipidemia <- factor(data$dyslipidemia,
levels=c(0,1),
labels=c("No", "Yes")
)
table1::label(data$antidepressant_use) <- "Antidepressants use"
data$antidepressant_use <- factor(data$antidepressant_use,
levels=c(0,1),
labels=c("No", "Yes")
)
table1::label(data$prior_covid19) <- "Prior COVID-19"
data$prior_covid19 <- factor(data$prior_covid19,
levels=c(0,1),
labels=c("No", "Yes")
)
table1::label(data$other_comorb) <- "Other comorbidities"
data$other_comorb <- factor(data$other_comorb,
levels=c(0,1),
labels=c("No", "Yes")
)
str(data)## ID age sex weight height
## Min. : 1.0 Min. :20.00 Woman:221 Min. : 73.8 Min. :1.480
## 1st Qu.: 61.5 1st Qu.:33.00 Man : 22 1st Qu.: 97.6 1st Qu.:1.620
## Median :122.0 Median :40.00 Median :111.2 Median :1.660
## Mean :122.0 Mean :40.37 Mean :115.9 Mean :1.670
## 3rd Qu.:182.5 3rd Qu.:49.00 3rd Qu.:129.7 3rd Qu.:1.715
## Max. :243.0 Max. :65.00 Max. :264.6 Max. :1.910
##
## BMI type_obesity ARISCAT ARISCAT_group
## Min. :30.00 Class 1 Obesity: 62 Min. :23.0 Low Risk :178
## 1st Qu.:34.81 Class 2 Obesity: 55 1st Qu.:23.0 Intermediate Risk: 65
## Median :40.31 Class 3 Obesity:126 Median :23.0 High Risk : 0
## Mean :41.51 Mean :24.6
## 3rd Qu.:46.15 3rd Qu.:26.0
## Max. :77.31 Max. :42.0
##
## ASA spo2_VPO surgical_procedure CORADS atelectasis
## ASA 1: 52 Min. :88.00 SG :192 CO-RADS 1:233 Yes: 81
## ASA 2:149 1st Qu.:93.00 RYGB: 7 CO-RADS 2: 6 No :162
## ASA 3: 33 Median :96.00 OAGB: 5 CO-RADS 3: 2
## NA's : 9 Mean :94.93 LBGS: 31 CO-RADS 4: 2
## 3rd Qu.:97.00 NA's: 8
## Max. :99.00
##
## atelectasis_location atelectasis_percent hb hct
## Unilateral: 55 Min. : 0.000 Min. : 9.90 Min. :30.30
## Bilateral : 26 1st Qu.: 0.000 1st Qu.:13.90 1st Qu.:40.52
## NA's :162 Median : 0.000 Median :14.45 Median :42.60
## Mean : 2.778 Mean :14.52 Mean :42.68
## 3rd Qu.: 5.000 3rd Qu.:15.20 3rd Qu.:44.67
## Max. :27.500 Max. :18.50 Max. :52.90
## NA's :5 NA's :5
## leu neu_percent neu_absolute linf_percent
## Min. : 3.100 Min. :32.00 Min. : 1.600 Min. :14.00
## 1st Qu.: 6.600 1st Qu.:59.00 1st Qu.: 3.969 1st Qu.:30.00
## Median : 7.700 Median :63.00 Median : 4.883 Median :35.00
## Mean : 7.826 Mean :62.94 Mean : 4.956 Mean :34.87
## 3rd Qu.: 8.900 3rd Qu.:68.00 3rd Qu.: 5.894 3rd Qu.:39.00
## Max. :13.300 Max. :84.00 Max. :10.773 Max. :66.00
## NA's :5 NA's :5 NA's :5 NA's :5
## linf_absolute mon_percent mon_absolute platelets
## Min. :0.616 Min. :0.000 Min. :0.616 Min. :154.0
## 1st Qu.:2.244 1st Qu.:1.000 1st Qu.:2.244 1st Qu.:272.2
## Median :2.587 Median :2.000 Median :2.587 Median :316.0
## Mean :2.703 Mean :1.626 Mean :2.703 Mean :317.2
## 3rd Qu.:3.095 3rd Qu.:2.000 3rd Qu.:3.095 3rd Qu.:354.8
## Max. :5.676 Max. :4.000 Max. :5.676 Max. :495.0
## NA's :5 NA's :5 NA's :5 NA's :5
## glucose urea creatinine rapid_covid_test
## Min. : 59.00 Min. :10.0 Min. :0.5000 negative:243
## 1st Qu.: 74.00 1st Qu.:17.0 1st Qu.:0.6600
## Median : 83.00 Median :20.5 Median :0.7500
## Mean : 85.61 Mean :21.4 Mean :0.7577
## 3rd Qu.: 92.00 3rd Qu.:26.0 3rd Qu.:0.8400
## Max. :200.00 Max. :49.0 Max. :1.5900
## NA's :5 NA's :5 NA's :5
## PCR_covid surgery_performed state_residence altitude
## negative: 3 No : 10 Texas :87 Min. : 31.0
## NA's :240 Yes:233 Washington:39 1st Qu.: 519.0
## Utah :27 Median : 519.0
## Alberta :22 Mean : 650.1
## Florida :20 3rd Qu.: 806.0
## California:16 Max. :1861.0
## (Other) :32
## hypertension diabetes sleep_apnea hypothyroidism dyslipidemia
## No :179 No :218 No :224 No :219 No :224
## Yes: 64 Yes: 25 Yes: 19 Yes: 24 Yes: 19
##
##
##
##
##
## antidepressant_use prior_covid19 other_comorb
## No :146 No :239 No :143
## Yes: 97 Yes: 4 Yes:100
##
##
##
##
##
Participants with higher probability of having a current diagnosis of COVID-19 are expected to have chest CT alterations due to COVID-19 pneumonia. Thus, will be excluded.
Number of patients according to CO-RADS:
## CO-RADS 1 CO-RADS 2 CO-RADS 3 CO-RADS 4
## 233 6 2 2
## n
## 1 243
## n
## 1 239
Since prior COVID-19 is considered a confounder and since there are only 3 participants with prior COVID-19 which would provide difficult to assess the role of prior COVID-19 in analyses, participants with prior COVID-19 were excluded from the analysis.
## n
## 1 239
## n
## 1 236
Will remove prior_covid19 column as it no longer provides information of a varying characteristic. Similarly, rapid_covid_test does not provide additional information.
## [1] 42
## [1] 40
Missing data for PCR_covid and is explained since only patients who decided to have a test performed on their own will reported the result. The medical center did not require a negative PCR test at that time during the pandemic, reason why PCR tests were not systematically performed. As shown in earlier summary of variables, all available tests (n=3) were negative. This variable will not be analyzed further downstream:
The variable atelectasis_location has missing data since those patients who did not have atelectasis naturally do not have a location recorded. Will assess if data are missing for those who had atelectasis:
There is no missing data for atelectasis_location after sub-setting only those who had atelectasis.
Lastly, I will subset all participants without the prior variable to further assess the extent of missing data for other variables:
Missing data constitutes less than 5% for all remaining variables. Thus, will proceed with complete-case analysis without performing any data imputation procedure.
table <- data %>%
group_by(state_residence) %>%
summarize(n=n()) %>%
mutate(name=state_residence) %>%
select(-state_residence) %>%
relocate(name) %>%
arrange(desc(n))
table## # A tibble: 10 × 2
## name n
## <fct> <int>
## 1 Texas 81
## 2 Washington 39
## 3 Utah 27
## 4 Alberta 21
## 5 Florida 20
## 6 California 16
## 7 Louisiana 16
## 8 Arkansas 7
## 9 Missouri 5
## 10 Nevada 4
The following two chunks of code were adapted from contribution by cpsievert.
# us laea projection as used in albersusa::us_laea_proj
us_laea <- "+proj=laea +lat_0=45 +lon_0=-100 +x_0=0 +y_0=0 +a=6370997 +b=6370997 +units=m +no_defs"
usa <- ne_states(country = "united states of america", returnclass = "sf") %>%
st_transform(us_laea) %>%
select(name)
canada <- ne_states(country = "canada", returnclass = "sf") %>%
st_transform(us_laea) %>%
select(name)
usa <- left_join(usa, table, by="name")
canada <- left_join(canada, table, by="name") Create map with number of patients per state in Canada and the USA.
figS2 <- plot_ly(split = ~name, color = ~n, colors = "Blues",
stroke = I("black"), span = I(1),
height = 450,
width = 800) %>%
add_sf(data = usa, color = I("dimgray")) %>%
add_sf(data = canada, color = I("dimgray")) %>%
add_sf(data = filter(canada, name %in% table$name)) %>%
add_sf(data = filter(usa, name %in% table$name)) %>%
layout(showlegend = FALSE) %>%
colorbar(title = "Number of<br>patients")Create a table with the absolute frequency of patients per state to complement visualization.
table <- table %>% rename(State=name)
fig_add <- plot_ly(type="table",
domain = list(x=c(0,0.25), y=c(0,0.6)),
header=list(values=names(table)),
cells=list(values=unname(table)))Combine the prior two elements in one single figure:
Due to the large number of figures produced, I will not show these in the rendered HTML document. Plots were created with this code:
for(variable in names(data)) {
if(is.numeric(data[[variable]])) {
name <- variable
Main <- paste("Histogram of", variable, sep=" ")
hist(data[[variable]], xlab=name, main=Main,
col = "steelblue")
Main <- paste("Boxplot of", variable, sep=" ")
boxplot(data[[variable]], xlab=name, main=Main,
horizontal=TRUE,
col = "steelblue")
Main <- paste("Normal Q-Q Plot of", variable, sep=" ")
qqnorm(data[[variable]], main = Main);
qqline(data[[variable]], col = "steelblue",lwd = 2)
}
}Near normal distribution:
- Age: light tails
- height: heavy right tail, 4 outliers right
- hb: heavy tails, bilateral outliers
- hct: heavy tails, bilateral outliers
- leu: near normal, bilateral outliers
- neu_absolute: heavy right tail, two right
outliers
- linf_absolute: heavy right tail, bilateral outliers (more right)
- mon_absolute: heavy right tail, bilateral outliers (more right)
- platelets: two right outliers
- urea: four right outliers
- creatinine: three right outliers
Distribution not normal:
- Weight: right-skewed, outliers are verified observations of extreme
weight. - BMI: right-skewed, outliers are verified observations of
extreme BMI.
- spo2_VPO: Left-skewed
- neu_percent: left-skewed
- linf_percent: right-skewed
- glucose: right-skewed
- mon_percent: observations around only 5 data points. Will not use this
variable, only absolute monocytes will be used.
- altitude: distribution not clear as values are quite apart an
concentrate around single states with diferring mean altitudes. Perhaps
best to try to use smooth term later?
Outcome variable:
- atelectasis_percent: Zero-inflated. Would be difficult to manage as
categorical ordinal due to low number of patients in some categories.
Will re-assess later and decide.
Group variables by distribution, then apply statement to use output to render table 1:
normal <- c("age",
"height",
"hb",
"hct",
"leu",
"neu_absolute",
"linf_absolute",
"mon_absolute",
"platelets",
"urea",
"creatinine"
)
nonormal <- c("weight"
,"BMI",
"spo2_VPO",
"neu_percent",
"linf_percent",
"glucose",
"atelectasis_percent",
"mon_percent",
"altitude"
)
for(name in normal) cat(name,"=", switch(EXPR = name, '"Mean (SD)",'),"\n")## age = "Mean (SD)",
## height = "Mean (SD)",
## hb = "Mean (SD)",
## hct = "Mean (SD)",
## leu = "Mean (SD)",
## neu_absolute = "Mean (SD)",
## linf_absolute = "Mean (SD)",
## mon_absolute = "Mean (SD)",
## platelets = "Mean (SD)",
## urea = "Mean (SD)",
## creatinine = "Mean (SD)",
## weight = "Median [Q1, Q3]",
## BMI = "Median [Q1, Q3]",
## spo2_VPO = "Median [Q1, Q3]",
## neu_percent = "Median [Q1, Q3]",
## linf_percent = "Median [Q1, Q3]",
## glucose = "Median [Q1, Q3]",
## atelectasis_percent = "Median [Q1, Q3]",
## mon_percent = "Median [Q1, Q3]",
## altitude = "Median [Q1, Q3]",
Log abbreviations
abbreviations = sort(c("body-mass index (BMI)",
"peripheral saturation of oxygen (SpO2)",
"COVID-19 Reporting and Data System (CO-RADS)",
"sleeve gastrectomy (SG)",
"roux-en-Y gastric bypass (RYGB)",
"one anastomosis gastric bypass (OAGB)",
"lap-band to gastric sleeve (LBGS)",
"coronavirus disease (COVID-19)",
"Assess Respiratory Risk in Surgical Patients in Catalonia (ARISCAT)",
"white blood cell (WBC)")
)
abbreviations_stats = sort(c("standard deviation (SD)",
"interquartile range (IQR)",
"percentage (%)",
"25th percentile (Q1)",
"75th percentile (Q3)")
)Generate Table 1 with render function
rndr <- function(x, name, ...) {
if (!is.numeric(x)) return(render.categorical.default(x))
what <- switch(name,
age = "Mean (SD)",
height = "Mean (SD)",
hb = "Mean (SD)",
hct = "Mean (SD)",
leu = "Mean (SD)",
neu_absolute = "Mean (SD)",
linf_absolute = "Mean (SD)",
mon_absolute = "Mean (SD)",
platelets = "Mean (SD)",
urea = "Mean (SD)",
creatinine = "Mean (SD)",
weight = "Median [Q1, Q3]",
BMI = "Median [Q1, Q3]",
spo2_VPO = "Median [Q1, Q3]",
neu_percent = "Median [Q1, Q3]",
linf_percent = "Median [Q1, Q3]",
glucose = "Median [Q1, Q3]",
atelectasis_percent = "Median [Q1, Q3]",
mon_percent = "Median [Q1, Q3]",
altitude = "Median [Q1, Q3]"
)
parse.abbrev.render.code(c("", what))(x)
}
table1 <- table1(~ sex + age + weight + height + BMI + surgical_procedure + ARISCAT_group + spo2_VPO + altitude + hypertension + diabetes + sleep_apnea + hypothyroidism + dyslipidemia + antidepressant_use + CORADS + glucose + creatinine + urea + hb + hct + leu + neu_absolute + linf_absolute + mon_absolute + platelets | type_obesity,
data=data,
overall=c(left="Total"),
render=rndr,
caption="Clinical characteristics of patients, according to obesity class.",
footnote=c(abbreviations,abbreviations_stats)
)
table1| Total (N=236) |
Class 1 Obesity (N=62) |
Class 2 Obesity (N=53) |
Class 3 Obesity (N=121) |
|
|---|---|---|---|---|
Assess Respiratory Risk in Surgical Patients in Catalonia (ARISCAT) body-mass index (BMI) coronavirus disease (COVID-19) COVID-19 Reporting and Data System (CO-RADS) lap-band to gastric sleeve (LBGS) one anastomosis gastric bypass (OAGB) peripheral saturation of oxygen (SpO2) roux-en-Y gastric bypass (RYGB) sleeve gastrectomy (SG) white blood cell (WBC) 25th percentile (Q1) 75th percentile (Q3) interquartile range (IQR) percentage (%) standard deviation (SD) | ||||
| sex | ||||
| Woman | 214 (90.7%) | 60 (96.8%) | 48 (90.6%) | 106 (87.6%) |
| Man | 22 (9.3%) | 2 (3.2%) | 5 (9.4%) | 15 (12.4%) |
| Age (years) | ||||
| Mean (SD) | 40.3 (9.87) | 42.1 (10.3) | 40.8 (9.25) | 39.1 (9.82) |
| Weight (kilograms (kg)) | ||||
| Median [Q1, Q3] | 111 [97.4, 130] | 88.8 [84.2, 95.7] | 107 [102, 112] | 128 [114, 142] |
| Height (meters (m)) | ||||
| Mean (SD) | 1.67 (0.0822) | 1.66 (0.0631) | 1.69 (0.0856) | 1.67 (0.0889) |
| BMI | ||||
| Median [Q1, Q3] | 40.3 [34.6, 46.0] | 33.0 [31.5, 33.8] | 38.3 [36.6, 39.1] | 45.6 [42.2, 51.1] |
| surgical_procedure | ||||
| SG | 189 (80.1%) | 52 (83.9%) | 41 (77.4%) | 96 (79.3%) |
| RYGB | 6 (2.5%) | 1 (1.6%) | 1 (1.9%) | 4 (3.3%) |
| OAGB | 5 (2.1%) | 1 (1.6%) | 1 (1.9%) | 3 (2.5%) |
| LBGS | 31 (13.1%) | 5 (8.1%) | 9 (17.0%) | 17 (14.0%) |
| ARISCAT_group | ||||
| Low Risk | 175 (74.2%) | 44 (71.0%) | 41 (77.4%) | 90 (74.4%) |
| Intermediate Risk | 61 (25.8%) | 18 (29.0%) | 12 (22.6%) | 31 (25.6%) |
| Oxygen saturation (SpO2) (%) | ||||
| Median [Q1, Q3] | 96.0 [93.0, 97.0] | 97.0 [95.0, 97.8] | 96.0 [94.0, 97.0] | 94.0 [92.0, 97.0] |
| Mean altitude (meters) | ||||
| Median [Q1, Q3] | 519 [519, 806] | 519 [313, 806] | 519 [519, 885] | 519 [519, 806] |
| hypertension | ||||
| No | 177 (75.0%) | 52 (83.9%) | 40 (75.5%) | 85 (70.2%) |
| Yes | 59 (25.0%) | 10 (16.1%) | 13 (24.5%) | 36 (29.8%) |
| diabetes | ||||
| No | 211 (89.4%) | 58 (93.5%) | 48 (90.6%) | 105 (86.8%) |
| Yes | 25 (10.6%) | 4 (6.5%) | 5 (9.4%) | 16 (13.2%) |
| sleep_apnea | ||||
| No | 218 (92.4%) | 60 (96.8%) | 50 (94.3%) | 108 (89.3%) |
| Yes | 18 (7.6%) | 2 (3.2%) | 3 (5.7%) | 13 (10.7%) |
| hypothyroidism | ||||
| No | 213 (90.3%) | 55 (88.7%) | 50 (94.3%) | 108 (89.3%) |
| Yes | 23 (9.7%) | 7 (11.3%) | 3 (5.7%) | 13 (10.7%) |
| dyslipidemia | ||||
| No | 218 (92.4%) | 58 (93.5%) | 48 (90.6%) | 112 (92.6%) |
| Yes | 18 (7.6%) | 4 (6.5%) | 5 (9.4%) | 9 (7.4%) |
| antidepressant_use | ||||
| No | 142 (60.2%) | 36 (58.1%) | 33 (62.3%) | 73 (60.3%) |
| Yes | 94 (39.8%) | 26 (41.9%) | 20 (37.7%) | 48 (39.7%) |
| CORADS | ||||
| CO-RADS 1 | 230 (97.5%) | 61 (98.4%) | 51 (96.2%) | 118 (97.5%) |
| CO-RADS 2 | 6 (2.5%) | 1 (1.6%) | 2 (3.8%) | 3 (2.5%) |
| Glucose (mg/dL) | ||||
| Median [Q1, Q3] | 83.0 [74.0, 92.0] | 83.0 [77.0, 90.0] | 81.0 [70.0, 92.0] | 83.0 [74.0, 92.0] |
| Creatinine (mg/dL) | ||||
| Mean (SD) | 0.758 (0.146) | 0.773 (0.115) | 0.744 (0.144) | 0.757 (0.160) |
| Urea (mg/dL) | ||||
| Mean (SD) | 21.4 (6.70) | 22.9 (6.08) | 20.5 (6.77) | 21.1 (6.89) |
| Hemoglobin (g/dL) | ||||
| Mean (SD) | 14.5 (1.21) | 14.5 (1.20) | 14.5 (1.17) | 14.6 (1.24) |
| Hematocrit (%) | ||||
| Mean (SD) | 42.8 (3.33) | 42.6 (3.32) | 42.6 (3.22) | 42.9 (3.41) |
| WBC count (10³/µL) | ||||
| Mean (SD) | 7.83 (1.76) | 7.81 (1.74) | 7.71 (1.76) | 7.89 (1.78) |
| Neutrophils (absolute) (10³/µL) | ||||
| Mean (SD) | 4.97 (1.42) | 4.94 (1.39) | 4.83 (1.39) | 5.04 (1.46) |
| Lymphocytes (absolute) (10³/µL) | ||||
| Mean (SD) | 2.70 (0.811) | 2.71 (0.802) | 2.70 (0.920) | 2.69 (0.771) |
| Monocytes (absolute) (10³/µL) | ||||
| Mean (SD) | 2.70 (0.811) | 2.71 (0.802) | 2.70 (0.920) | 2.69 (0.771) |
| Platelets (cells/µL) | ||||
| Mean (SD) | 316 (64.4) | 307 (67.6) | 319 (63.2) | 320 (63.2) |
NOTE: The ASA physical status variable has not been included in analyses since the updated version of ASA consulted on October 2023 classifies obesity (30<BMI<40) as ASA 2 and obesity (BMI ≥40) as ASA 3. The distribution of frequencies of ASA~obesity class in this dataset does not match such definition. This occurred since an outdated version of ASA that did not include obesity was likely used by clinicians when writing the preoperative assessment medical note:
##
## Class 1 Obesity Class 2 Obesity Class 3 Obesity
## ASA 1 31 18 3
## ASA 2 29 34 85
## ASA 3 0 0 32
Save Table 1:
t1flex(table1) %>% save_as_html(path = paste(tabfolder,"/Table1.html",sep=""))
properties <- prop_section(
page_size = page_size(orient = "landscape",
width = 8.3, height = 11.7),
type = "continuous",
page_margins = page_mar()
)
t1flex(table1) %>%
save_as_docx(path = paste(tabfolder,"/Table1.docx",sep=""),
pr_section = properties
)The selection of variables that will be assessed is according to the following directed acyclic graph which will be used again before statistical modelling, to assess conditional independencies.
DAG <- dagitty( 'dag {
bb="-3.687,-2.637,5.96,2.575"
age [adjusted,pos="-0.929,-0.900"]
altitude_cat [pos="4.259,0.627"]
atelectasis_percent [outcome,pos="-0.258,-2.137"]
hb [pos="-0.789,2.123"]
sex [adjusted,pos="-2.995,-0.744"]
sleep_apnea [adjusted,pos="0.108,0.783"]
spo2_VPO [outcome,pos="2.079,0.078"]
type_obesity [exposure,pos="-2.883,0.520"]
age -> atelectasis_percent
age -> hb
age -> sleep_apnea
age -> spo2_VPO
age -> type_obesity
altitude_cat -> atelectasis_percent
altitude_cat -> hb
altitude_cat -> sleep_apnea
altitude_cat -> spo2_VPO
atelectasis_percent -> hb
atelectasis_percent -> spo2_VPO
hb -> spo2_VPO
sex -> atelectasis_percent
sex -> hb
sex -> sleep_apnea
sex -> spo2_VPO
sex -> type_obesity
sleep_apnea -> atelectasis_percent
sleep_apnea -> spo2_VPO
sleep_apnea -> type_obesity
type_obesity -> atelectasis_percent
type_obesity -> spo2_VPO
}
' )
plot(DAG)Other variables that are potential confounders are not shown in this
DAG since they were addressed by design in this study as follows:
- Current COVID-19: Exclusion criteria were applied to
n=2 patients with CO-RADS 3 and n=2
with CO-RADS 4. Only participants with low probability of COVID-19
(CO-RADS 1 and 2) were included in this study.
- Prior COVID-19: This was an exclusion criterion
(n=3).
- Bronchiectasis: This was an exclusion criterion (n=0).
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 20.00 32.75 40.00 40.26 48.25 65.00
Distribution of age was assessed earlier.
The mean age was 40.3 (SD: 9.87).
## sex
## Woman Man
## 214 22
## sex
## Woman Man
## 90.7 9.3
Most patients in the sample were woman (n=214, 90.7%).
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 30.00 34.63 40.30 41.37 46.02 77.31
## type_obesity
## Class 1 Obesity Class 2 Obesity Class 3 Obesity
## 62 53 121
## type_obesity
## Class 1 Obesity Class 2 Obesity Class 3 Obesity
## 26.3 22.5 51.3
Distribution of BMI was assessed earlier. It is right-skewed due to extreme values (verified outliers). The WHO classification of BMI for obesity class will be used to complement descriptions and for potential use later during statistical modelling.
The median BMI was 40.295 (IQR: 34.63- 46.02). The distribution of BMI was right-skewed due to extreme BMI values (range: 30- 77.31). Most patients were in the class 3 obesity category (n=121, 51.3%), followed by class 1 (n=62, 26.3%) and 2 (n=53, 22.5%).
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 88 93 96 95 97 99
Distribution of SpO2 during the pre-anesthetic assessment was presented earlier. It is left-skewed due to some participants exhibiting decreased SpO2. Will categorize according to clinical categories to assess the proportion of patients with decreased SpO2.
First, create SpO2 breaks:
data <- data %>%
mutate(spo2_cat = cut(spo2_VPO,
breaks=c(87,90,94,100),
right=TRUE,
labels=c("≤90","90 to 94",">94")
)
)## spo2_cat
## ≤90 90 to 94 >94
## 15 75 146
## spo2_cat
## ≤90 90 to 94 >94
## 6.4 31.8 61.9
The median SpO2 during the pre-anethetic assessment was 96 (IQR: 93-97) %, with a minimum value of 88%. Of these, n=146 (61.9%) had normal SpO2 (above 94%), whereas n=75 (31.8%) had a value in the 90-94% range, and n=15 (6.4%) had ≤90%.
## sleep_apnea
## No Yes
## 218 18
## sleep_apnea
## No Yes
## 92.4 7.6
Patients with a diagnosis of OSA were 7.6% (n=18) of the sample.
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 31.0 519.0 519.0 652.7 806.0 1861.0
Distribution of altitude was assessed earlier. Cannot assume normal distribution. Thus, I will create a new variable categorizing values according to the study by Crocker ME, et al.
data <- data %>%
mutate(altitude_cat = cut(altitude,
breaks=c(0,1000,2500),
right=FALSE,
labels=c("Low altitude","Moderate altitude")
)
)## altitude_cat
## Low altitude Moderate altitude
## 205 31
## altitude_cat
## Low altitude Moderate altitude
## 86.9 13.1
Characteristics of participants according to BMI class are shown in Table 1.
Scatterplot
Relationship does not seem to be linear (also, variables were not normally distributed, with outliers), but suggests a negative correlation. Will assess if a smooth BMI term explains SpO2 better, and if so, what is the best number of knots to model this relationship:
GAM model with k=2:
##
## Family: gaussian
## Link function: identity
##
## Formula:
## spo2_VPO ~ s(BMI, k = 2)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 94.9958 0.1436 661.5 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(BMI) 1.923 1.994 61.61 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.349 Deviance explained = 35.4%
## GCV = 4.9287 Scale est. = 4.8677 n = 236
## k' edf k-index p-value
## s(BMI) 2 1.923156 1.035767 0.665
GAM model with k=4:
##
## Family: gaussian
## Link function: identity
##
## Formula:
## spo2_VPO ~ s(BMI, k = 4)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 94.996 0.141 673.6 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(BMI) 2.835 2.98 47.86 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.372 Deviance explained = 38%
## GCV = 4.7712 Scale est. = 4.6937 n = 236
## k' edf k-index p-value
## s(BMI) 3 2.834552 1.074122 0.845
GAM model with k=6:
##
## Family: gaussian
## Link function: identity
##
## Formula:
## spo2_VPO ~ s(BMI, k = 6)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 94.9958 0.1396 680.5 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(BMI) 4.149 4.678 32.02 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.385 Deviance explained = 39.6%
## GCV = 4.7022 Scale est. = 4.5996 n = 236
## k' edf k-index p-value
## s(BMI) 5 4.148805 1.102576 0.9425
GAM model with k=8:
##
## Family: gaussian
## Link function: identity
##
## Formula:
## spo2_VPO ~ s(BMI, k = 8)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 94.9958 0.1394 681.5 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(BMI) 4.723 5.629 26.62 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.387 Deviance explained = 39.9%
## GCV = 4.7001 Scale est. = 4.5862 n = 236
## k' edf k-index p-value
## s(BMI) 7 4.723071 1.108757 0.975
GAM model with k=12:
##
## Family: gaussian
## Link function: identity
##
## Formula:
## spo2_VPO ~ s(BMI, k = 12)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 94.9958 0.1395 680.8 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(BMI) 4.616 5.685 26.16 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.385 Deviance explained = 39.8%
## GCV = 4.7067 Scale est. = 4.5947 n = 236
## k' edf k-index p-value
## s(BMI) 11 4.615804 1.106134 0.955
Best AIC:
## [[1]]
## [1] 1048.14
##
## [[2]]
## [1] 1040.448
##
## [[3]]
## [1] 1036.959
##
## [[4]]
## [1] 1036.83
##
## [[5]]
## [1] 1037.165
All models are significantly better than linear. Thus, using a smooth term for BMI is better than modelling a linear relationship.
Regarding AIC, the models with k>6 are not better at explaining the variance. Will try a model with k=5 since the best model is expected to be anywhere between k=4 and k=6:
GAM model with k=5 (4 degrees of freedom):
##
## Family: gaussian
## Link function: identity
##
## Formula:
## spo2_VPO ~ s(BMI, k = 5)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 94.9958 0.1399 679.1 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(BMI) 3.685 3.941 37.81 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.382 Deviance explained = 39.2%
## GCV = 4.7121 Scale est. = 4.6185 n = 236
## k' edf k-index p-value
## s(BMI) 4 3.685135 1.09624 0.9325
Best AIC:
## [[1]]
## [1] 1040.448
##
## [[2]]
## [1] 1037.475
##
## [[3]]
## [1] 1036.959
Model with k=5 still offers and advantage compared to k=4 (drop in AIC). No other improvements in k-index or visual representation are achieved with higher k. Thus, will use k=5 to model.
fig1b <- ggplot(data, aes(BMI,spo2_VPO)) +
geom_point(size=0.6,color="gray60") +
geom_smooth(method="gam",formula = y ~ s(x, bs = "cs", k = 5), color="cadetblue4") +
labs(y="SpO2 (%)",x="Body mass index (kg/m²)",tag="B") +
ylim(83,100) + xlim(30,80) +
theme_bw() +
theme(panel.border = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(colour = "black"),
axis.text.x = element_text(size=rel(1.2)),
axis.text.y = element_text(size=rel(1.2))
)
fig1bNegative non-monotonic relationship since SpO2 decreases, but then seems to increase slightly again at BMI 40, followed by a marked decrease as BMI decreases at values higher than ~42.
Spearman’s correlation coefficient shouldn’t be used due to relationship not being monotonically decreasing. However, I will calculate it just to have a rough idea (but will not report this in the paper).
##
## Spearman's rank correlation rho
##
## data: spo2_VPO and BMI
## S = 3105183, p-value = 2.28e-11
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.417458
Scatterplot
Datapoints scattered. Relationship probably linear, but there are influential true outliers with extreme BMI. Will assess with Spearman correlation analysis due to extreme BMI values.
##
## Spearman's rank correlation rho
##
## data: age and BMI
## S = 2530759, p-value = 0.017
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.1552445
Age had a weak negative correlation with BMI (rho= -0.155, p=0.017).
data_BMI <- data %>% group_by(sex)
median_bmi <- data_BMI %>%
summarize(n = n(),
BMI_median = median(BMI),
Q1 = quantile(BMI,0.25),
Q3 = quantile(BMI,0.75),
min = min(BMI),
max = max(BMI)
)
median_bmi## # A tibble: 2 × 7
## sex n BMI_median Q1 Q3 min max
## <fct> <int> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Woman 214 39.8 34.5 45.5 30 74.9
## 2 Man 22 43.0 37.9 46.2 30.8 77.3
Distribution not normal and influential outliers. Will assess non-parametrically.
##
## Wilcoxon rank sum test with continuity correction
##
## data: BMI by sex
## W = 1918.5, p-value = 0.1537
## alternative hypothesis: true location shift is not equal to 0
The median BMI was not different between men (43, IQR: 37.9-46.2) and women (39.9, IQR: 34.5-45.5) (p=0.154).
Distribution not normal and influential outliers. Will assess non-parametrically.
data_BMI <- data %>% group_by(sleep_apnea)
median_bmi <- data_BMI %>%
summarize(n = n(),
BMI_median = median(BMI),
Q1 = quantile(BMI,0.25),
Q3 = quantile(BMI,0.75),
min = min(BMI),
max = max(BMI)
)
median_bmi## # A tibble: 2 × 7
## sleep_apnea n BMI_median Q1 Q3 min max
## <fct> <int> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 No 218 39.8 34.5 45.1 30 74.9
## 2 Yes 18 44.0 40.1 49.2 32.4 77.3
##
## Wilcoxon rank sum test with continuity correction
##
## data: BMI by sleep_apnea
## W = 1274, p-value = 0.01353
## alternative hypothesis: true location shift is not equal to 0
The median BMI was significantly higher in participants with sleep apnea (44, IQR: 40.1-49.2) compared to those without OSA (39.8, IQR: 34.5-45.1) (p=0.014).
Scatterplot
Do not seem to be correlated. Will apply Spearman’s correlation test:
##
## Spearman's rank correlation rho
##
## data: spo2_VPO and age
## S = 2143192, p-value = 0.7405
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.02167287
Age and SpO2 were not correlated (rho= 0.022, p=0.74).
Assess distribution of age by sex:
data_age <- data %>% group_by(sex)
ggplot(data_age,aes(x = age)) +
geom_histogram(fill = "firebrick3", colour = "black") +
facet_grid(sex ~ .)Distribution near-normal, but light tails for women. However, t-test could be robust to deviations from normality and differences in group size. Will assess mean and variance for further testing:
mean_age <- data_age %>%
summarise(n=n(),
age_mean = mean(age),
sd = sd(age),
variance = var(age)
)
mean_age## # A tibble: 2 × 5
## sex n age_mean sd variance
## <fct> <int> <dbl> <dbl> <dbl>
## 1 Woman 214 40.2 9.94 98.9
## 2 Man 22 40.6 9.28 86.1
Variances are similar. However, group sizes differ my 10x. Welch’s t-test more suitable:
##
## Welch Two Sample t-test
##
## data: age by sex
## t = -0.19917, df = 26.213, p-value = 0.8437
## alternative hypothesis: true difference in means between group Woman and group Man is not equal to 0
## 95 percent confidence interval:
## -4.715913 3.882438
## sample estimates:
## mean in group Woman mean in group Man
## 40.21963 40.63636
Mean age was similar bethween men (40.6, sd:9.3) and women (40.2, sd:9.9) (p=0.844).
data_age <- data %>% group_by(sleep_apnea)
ggplot(data_age, aes(x = age, fill=sleep_apnea)) +
geom_histogram(position = "identity", alpha = 0.4)Distribution near-normal. Will assess mean and variance for further testing.
mean_age <- data_age %>%
summarise(n=n(),
age_mean = mean(age),
sd = sd(age),
variance = var(age)
)
mean_age## # A tibble: 2 × 5
## sleep_apnea n age_mean sd variance
## <fct> <int> <dbl> <dbl> <dbl>
## 1 No 218 40.2 10.0 100.
## 2 Yes 18 41.4 8.19 67.1
Size per group very different, variances do not look similar. Welch’s t-test more suitable:
##
## Welch Two Sample t-test
##
## data: age by sleep_apnea
## t = -0.59817, df = 21.42, p-value = 0.556
## alternative hypothesis: true difference in means between group No and group Yes is not equal to 0
## 95 percent confidence interval:
## -5.473186 3.025683
## sample estimates:
## mean in group No mean in group Yes
## 40.16514 41.38889
Age was not significantly different between participants with OSA (41.4, sd:8.2) and those without (40.2, sd:10) (p=0.556).
data_spo2 <- data %>% group_by(sex)
ggplot(data_spo2, aes(x = spo2_VPO, fill=sex)) +
geom_histogram(position = "identity", alpha = 0.4)Distribution deviates from normal and small group size for men. Will assess non-parametrically.
median_spo2 <- data_spo2 %>%
summarize(n = n(),
spo2_median = median(spo2_VPO),
Q1 = quantile(spo2_VPO,0.25),
Q3 = quantile(spo2_VPO,0.75),
min = min(spo2_VPO),
max = max(spo2_VPO)
)
median_spo2## # A tibble: 2 × 7
## sex n spo2_median Q1 Q3 min max
## <fct> <int> <dbl> <dbl> <dbl> <int> <int>
## 1 Woman 214 96 93 97 88 99
## 2 Man 22 94.5 92 97.8 88 98
##
## Wilcoxon rank sum test with continuity correction
##
## data: spo2_VPO by sex
## W = 2602, p-value = 0.413
## alternative hypothesis: true location shift is not equal to 0
The median SpO2 was not different between men (94.5, IQR: 92-97.8) and women (96, IQR: 93-97) (p=0.413).
data_spo2 <- data %>% group_by(sleep_apnea)
ggplot(data_spo2, aes(x = spo2_VPO, fill=sleep_apnea)) +
geom_histogram(position = "identity", alpha = 0.4)Distribution not normal, and smaller group size for those with sleep apnea. Will assess non-parametrically.
median_spo2 <- data_spo2 %>%
summarize(n = n(),
spo2_median = median(spo2_VPO),
Q1 = quantile(spo2_VPO,0.25),
Q3 = quantile(spo2_VPO,0.75),
min = min(spo2_VPO),
max = max(spo2_VPO)
)
median_spo2## # A tibble: 2 × 7
## sleep_apnea n spo2_median Q1 Q3 min max
## <fct> <int> <dbl> <dbl> <dbl> <int> <int>
## 1 No 218 96 93 97 88 99
## 2 Yes 18 92 91 93 88 94
##
## Wilcoxon rank sum test with continuity correction
##
## data: spo2_VPO by sleep_apnea
## W = 3350, p-value = 4.973e-07
## alternative hypothesis: true location shift is not equal to 0
Patients with sleep apnea had a lower median SpO2 (92, IQR: 91-93) than those without OSA (96, IQR: 93-97) (p<0.001).
Scatterplot
plot(spo2_VPO~altitude, data=data,
main="Scatterplot",
xlab="Mean altitude (meters)",
ylab="SpO2 (%)"
)
abline(lm(spo2_VPO~altitude),col="steelblue")There does not seem to be a pattern.
Would a smooth term be useful to model altitude?
ggplot(data, aes(altitude,spo2_VPO)) +
geom_point(size=0.6,color="gray40") +
geom_smooth(method="loess", color="cadetblue4") +
ylab("SpO2 (%)") + xlab("Mean altitude (meters)") +
ylim(85,100) + xlim(0,2000) +
theme_bw() +
theme(panel.border = element_blank(), panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(colour = "black"),
axis.text.x = element_text(size=rel(1.2)),
axis.text.y = element_text(size=rel(1.2))
)It is likely that a smooth term for SpO2 would be non-informative since there is no clear reasonable pattern in this smooth plot. Additionally, it is well known that any impacts in SpO2 due to altitudes up to 2000 are very limited (i.e 1 to 2 units). REF.
I will still check if a smooth term may be better than linear in case that adjustment for this variable is needed.
GAM model with k=2:
##
## Family: gaussian
## Link function: identity
##
## Formula:
## spo2_VPO ~ s(altitude, k = 2)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 94.9958 0.1779 533.9 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(altitude) 1.542 1.79 0.519 0.58
##
## R-sq.(adj) = 0.000785 Deviance explained = 0.734%
## GCV = 7.5521 Scale est. = 7.4707 n = 236
## k' edf k-index p-value
## s(altitude) 2 1.542225 1.018301 0.61
##
## Family: gaussian
## Link function: identity
##
## Formula:
## spo2_VPO ~ s(altitude, k = 4)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 94.996 0.178 533.7 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(altitude) 1.505 1.798 0.437 0.631
##
## R-sq.(adj) = 0.000124 Deviance explained = 0.653%
## GCV = 7.5559 Scale est. = 7.4757 n = 236
## k' edf k-index p-value
## s(altitude) 3 1.504893 1.017622 0.5925
Smooth term is not significantly better than one assuming linearity. Furthermore, the relationship with SpO2 in smooth term does not make any sense (i.e., according to prior reference, SpO2 should decrease at higher altitudes). Thus, it would be very likely that including this term would only explain noise in any case, not the true known causal relationship between SpO2 and altitude.
Lastly, will check the pattern according to altitude categories, which may be a better term to use in models in any case.
data_spo2 <- data %>% group_by(altitude_cat)
ggplot(data_spo2, aes(x = spo2_VPO, fill=altitude_cat)) +
geom_histogram(position = "identity", alpha = 0.4)Distribution deviates from normal and small group size for the moderate altitude group. Will assess non-parametrically.
median_spo2 <- data_spo2 %>%
summarize(n = n(),
spo2_median = median(spo2_VPO),
Q1 = quantile(spo2_VPO,0.25),
Q3 = quantile(spo2_VPO,0.75),
min = min(spo2_VPO),
max = max(spo2_VPO)
)
median_spo2## # A tibble: 2 × 7
## altitude_cat n spo2_median Q1 Q3 min max
## <fct> <int> <int> <dbl> <dbl> <int> <int>
## 1 Low altitude 205 96 93 97 88 99
## 2 Moderate altitude 31 96 92 97 88 99
##
## Wilcoxon rank sum test with continuity correction
##
## data: spo2_VPO by altitude_cat
## W = 3360, p-value = 0.6043
## alternative hypothesis: true location shift is not equal to 0
The median SpO2 was not different between low and moderate altitude categories (p=0.604).
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 9.90 13.90 14.50 14.54 15.20 18.50 2
Scatterplot
plot(spo2_VPO~hb, data=data,
main="Scatterplot",
xlab="Hemoglobin (g/dL)",
ylab="SpO2 (%)"
)
abline(lm(spo2_VPO~altitude),col="red")There does not seem to be a clear pattern.
Would a smooth term be useful to model SpO2?
ggplot(data, aes(hb,spo2_VPO)) +
geom_point(size=0.6,color="gray40") +
geom_smooth(method="loess", color="red") +
ylab("SpO2 (%)") + xlab("Hemoglobin (g/dL)") +
ylim(85,100) +
theme_bw() +
theme(panel.border = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(colour = "black"),
axis.text.x = element_text(size=rel(1.2)),
axis.text.y = element_text(size=rel(1.2))
)Hemoglobin likely has an effect on SpO2 at lower hemoglobin values, which makes sense with what is observed in the graph. Assuming a linear relationship could lead to incorrect conclusions according to this. Nonetheless, it looks like the apparent non-linear relationship at low Hb values is due to only 2 observations with wide confidence intervals showing that the true slope could go either up, straight or down, so it may also be incorrect to assume a non-linear relationship based only on this plot. I will model to see if there is an optimal smooth term for hemoglobin or if a linear term best fits the data:
GAM model with k=2:
##
## Family: gaussian
## Link function: identity
##
## Formula:
## spo2_VPO ~ s(hb, k = 2)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 94.9829 0.1789 530.9 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(hb) 1 1 0.272 0.603
##
## R-sq.(adj) = -0.00314 Deviance explained = 0.117%
## GCV = 7.5555 Scale est. = 7.4909 n = 234
## k' edf k-index p-value
## s(hb) 2 1 0.9302737 0.13
GAM model with k=4:
##
## Family: gaussian
## Link function: identity
##
## Formula:
## spo2_VPO ~ s(hb, k = 4)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 94.9829 0.1789 530.9 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(hb) 1 1 0.272 0.603
##
## R-sq.(adj) = -0.00314 Deviance explained = 0.117%
## GCV = 7.5555 Scale est. = 7.4909 n = 234
## k' edf k-index p-value
## s(hb) 3 1 0.9302737 0.1375
The estimated degrees of freedom (edf) in both cases were 1, plus p=0.6, meaning that a linear term is better fitted to this data than a non-linear term.
##
## Spearman's rank correlation rho
##
## data: spo2_VPO and hb
## S = 2274841, p-value = 0.3201
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.06527711
SpO2 and hemoglobin were not correlated (rho= -0.065, p=0.32).
Mean expected frequency:
mean_exp <- data %>%
drop_na(sex,sleep_apnea) %>%
summarize(mean_expected_freq = n()/(nlevels(sex)*nlevels(sleep_apnea)))
mean_exp## mean_expected_freq
## 1 59
Since value is grater than 5.0, chi-squared without continuity correction is appropriate.
## sleep_apnea
## sex No Yes
## Woman 204 10
## Man 14 8
## sleep_apnea
## sex No Yes
## Woman 0.8644 0.0424
## Man 0.0593 0.0339
Mosaic Plot
ggplot(data = data) +
geom_mosaic(aes(x = product(sex,sleep_apnea), fill=sex),na.rm = TRUE) +
scale_fill_manual(values=c("peachpuff","sandybrown")) +
theme_mosaic() ##
## Pearson's Chi-squared test
##
## data: freq
## X-squared = 28.437, df = 1, p-value = 9.68e-08
Percentage by level (women, men):
## sleep_apnea
## sex No Yes
## Woman 95.33 4.67
## Man 63.64 36.36
Sex was associated with OSA (p<0.001) as men had the diagnosis more frequently compared to women.
Corroborate that atelectasis(Yes/No) matches atelectasis percent equal or different to 0%:
## atelectasis_percent
## atelectasis 0 2.5 5 7.5 10 12.5 15 17.5 27.5
## Yes 0 11 14 33 6 1 4 7 1
## No 159 0 0 0 0 0 0 0 0
Yes, these do match.
freq <- table(atelectasis)
percent <- round((prop.table(freq)*100),2)
total <- rbind(freq,percent)
total## Yes No
## freq 77.00 159.00
## percent 32.63 67.37
Prevalence of atelectasis with 95% confidence interval
prev_atelec <- prop.test(freq, correct=FALSE)
confint <- round((prev_atelec$conf.int*100),2)
prev_atelec##
## 1-sample proportions test without continuity correction
##
## data: freq, null probability 0.5
## X-squared = 28.492, df = 1, p-value = 9.411e-08
## alternative hypothesis: true p is not equal to 0.5
## 95 percent confidence interval:
## 0.2696526 0.3884549
## sample estimates:
## p
## 0.3262712
The prevalence of atelectasis was 32.63 (95%CI: 26.97, 38.85).
Mean expected frequency:
mean_exp <- data %>%
drop_na(type_obesity, atelectasis) %>%
summarize(mean_expected_freq = n()/(nlevels(type_obesity)*nlevels(atelectasis)))
mean_exp## mean_expected_freq
## 1 39.33333
## atelectasis
## type_obesity Yes No
## Class 1 Obesity 8 54
## Class 2 Obesity 15 38
## Class 3 Obesity 54 67
## atelectasis
## type_obesity Yes No
## Class 1 Obesity 0.0339 0.2288
## Class 2 Obesity 0.0636 0.1610
## Class 3 Obesity 0.2288 0.2839
Mosaic Plot
data %>%
mutate(atelectasis = fct_relevel(atelectasis, "No", "Yes")) %>%
ggplot() +
geom_mosaic(aes(x = product(atelectasis,type_obesity), fill=atelectasis),na.rm = TRUE) +
scale_fill_manual(values=c("grey95","lightsteelblue4")) +
theme_mosaic() +
theme(axis.text.x=element_text(size=rel(0.8)))##
## Pearson's Chi-squared test
##
## data: freq
## X-squared = 19.352, df = 2, p-value = 6.279e-05
Mean expected frequency:
mean_exp <- data %>%
drop_na(type_obesity, atelectasis_location) %>%
summarize(mean_expected_freq = n()/(nlevels(type_obesity)*nlevels(atelectasis_location)))
mean_exp## mean_expected_freq
## 1 12.83333
Mean expected frequency is greater than 5.0, so chi-squared without continuity correction is adequate.
## atelectasis_location
## type_obesity Unilateral Bilateral
## Class 1 Obesity 7 1
## Class 2 Obesity 10 5
## Class 3 Obesity 36 18
## atelectasis_location
## type_obesity Unilateral Bilateral
## Class 1 Obesity 0.0909 0.0130
## Class 2 Obesity 0.1299 0.0649
## Class 3 Obesity 0.4675 0.2338
Mosaic Plot
ggplot(data = data) +
geom_mosaic(aes(x = product(type_obesity,atelectasis_location),
fill=type_obesity),na.rm = TRUE) +
scale_fill_manual(values=c("seagreen1","seagreen3","seagreen4")) +
theme_mosaic() ##
## Pearson's Chi-squared test
##
## data: freq
## X-squared = 1.4503, df = 2, p-value = 0.4843
Prevalence of atelectasis with 95% confidence intervals
#This is the prevalence of atelectasis for the total sample:
atelectasis_total <- data %>% group_by(atelectasis) %>%
summarise(n = n()) %>%
mutate(prev = n / sum(n)*100,
lower = lapply(n, prop.test, n = sum(n)),
upper = sapply(lower, function(x) x$conf.int[2])*100,
lower = sapply(lower, function(x) x$conf.int[1])*100,
type_obesity = "Total") %>%
mutate_at(3:5, round, 2)
atelectasis_total$confint <- paste(atelectasis_total$lower, "-", atelectasis_total$upper)
atelectasis_total <- atelectasis_total %>% select(-c(lower,upper))
#This is the prevalence of atelectasis by obesity class:
atelectasis_obesity <- data %>% group_by(type_obesity, atelectasis) %>%
summarise(n = n()) %>%
mutate(prev = n / sum(n)*100,
lower = lapply(n, prop.test, n = sum(n)),
upper = sapply(lower, function(x) x$conf.int[2])*100,
lower = sapply(lower, function(x) x$conf.int[1])*100) %>%
mutate_at(4:6, round, 2)
atelectasis_obesity$confint <- paste(atelectasis_obesity$lower, "-", atelectasis_obesity$upper)
atelectasis_obesity <- atelectasis_obesity %>% select(-c(lower,upper))
#This is just to combine the two prior tables:
atelectasis <- atelectasis_total %>%
bind_rows(atelectasis_obesity) %>%
relocate(type_obesity, .before = n)
atelectasis## # A tibble: 8 × 5
## atelectasis type_obesity n prev confint
## <fct> <chr> <int> <dbl> <chr>
## 1 Yes Total 77 32.6 26.77 - 39.06
## 2 No Total 159 67.4 60.94 - 73.23
## 3 Yes Class 1 Obesity 8 12.9 6.13 - 24.4
## 4 No Class 1 Obesity 54 87.1 75.6 - 93.87
## 5 Yes Class 2 Obesity 15 28.3 17.2 - 42.56
## 6 No Class 2 Obesity 38 71.7 57.44 - 82.8
## 7 Yes Class 3 Obesity 54 44.6 35.68 - 53.92
## 8 No Class 3 Obesity 67 55.4 46.08 - 64.32
Location of atelectasis (Unilateral/Bilateral)
#This is the location of atelectasis for the total sample:
atelectasis_total_location <- data %>%
filter(atelectasis=="Yes") %>%
group_by(atelectasis_location) %>%
summarise(Freq = n()) %>%
mutate(Percentage = (round((Freq/sum(Freq)*100),digits=2)),
type_obesity = "Total"
)
atelectasis_total_location$sumpercent <- paste(atelectasis_total_location$Freq," (", atelectasis_total_location$Percentage, "%)")
atelectasis_total_location <- atelectasis_total_location %>%
select(-c(Freq,Percentage)) %>%
pivot_wider(names_from = atelectasis_location,values_from = sumpercent)
#This is the location by obesity class:
atelectasis_obesity_location <- data %>%
filter(atelectasis=="Yes") %>%
group_by(type_obesity, atelectasis_location) %>%
summarise(Freq = n()) %>%
mutate(Percentage = (round((Freq/sum(Freq)*100),digits=2)))
atelectasis_obesity_location$sumpercent <- paste(atelectasis_obesity_location$Freq," (", atelectasis_obesity_location$Percentage, "%)")
atelectasis_obesity_location <- atelectasis_obesity_location %>% select(-c(Freq,Percentage)) %>% pivot_wider(names_from = atelectasis_location,values_from = sumpercent)
# This is to bind both of the location results in a single table
location <- atelectasis_total_location %>%
bind_rows(atelectasis_obesity_location)
location## # A tibble: 4 × 3
## type_obesity Unilateral Bilateral
## <chr> <chr> <chr>
## 1 Total 53 ( 68.83 %) 24 ( 31.17 %)
## 2 Class 1 Obesity 7 ( 87.5 %) 1 ( 12.5 %)
## 3 Class 2 Obesity 10 ( 66.67 %) 5 ( 33.33 %)
## 4 Class 3 Obesity 36 ( 66.67 %) 18 ( 33.33 %)
The prevalence of atelectasis was greater in higher obesity classes: class 1, n=8 (12.9%, 95%CI:6.13 - 24.4); class 2, n=15 (28.3%, 95%CI:17.2 - 42.56); and class 3, n=54 (44.63%, 95%CI:35.68 - 53.92) (p<0.001).
Of those who had atelectasis, the most frequent presentation was unilateral n=53 ( 68.83 %), compared to bilateral n=24 ( 31.17 %). When examining this by obesity class, the observed distribution was not significantly different for those with class 1, 2, and 3 obesity categories (n=7 ( 87.5 %), n=10 ( 66.67 %), and n=36 ( 66.67 %), respectively) (p=0.484).
Assess distribution if assumed to be a numeric variable:
data %>%
mutate(atelectasis_percent = as.numeric(atelectasis_percent)) %>%
ggplot(aes(x = atelectasis_percent)) +
geom_histogram(colour = "black") +
facet_grid(type_obesity ~ .)Distribution excluding 0:
data %>%
mutate(atelectasis_percent = as.numeric(atelectasis_percent)) %>%
filter(atelectasis_percent >0) %>%
ggplot(aes(x = atelectasis_percent)) +
geom_histogram(colour = "black") +
facet_grid(type_obesity ~ .)The following would be the mean atelectasis percentage coverage if a normal distribution were assumed, which is what has been done in some prior studies:
## mean sd
## 1 2.658898 4.687145
And by obesity class:
data %>% group_by(type_obesity) %>%
summarize(
mean = mean(atelectasis_percent),
sd = sd(atelectasis_percent)
) ## # A tibble: 3 × 3
## type_obesity mean sd
## <fct> <dbl> <dbl>
## 1 Class 1 Obesity 0.927 2.91
## 2 Class 2 Obesity 1.56 3.15
## 3 Class 3 Obesity 4.03 5.52
As is evident from these numbers, assuming normality causes standard deviation to capture negative values, which is impossible in reality for this variable.
Thus, bootstrapping the mean and 95%CI is expected to lead to more appropriate estimates:
boot_atel <- one.boot(data$atelectasis_percent, mean, R=10000)
mean_boot <- mean(boot_atel$t)
mean_boot## [1] 2.66296
## BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
## Based on 10000 bootstrap replicates
##
## CALL :
## boot.ci(boot.out = boot_atel)
##
## Intervals :
## Level Normal Basic
## 95% ( 2.062, 3.247 ) ( 2.044, 3.231 )
##
## Level Percentile BCa
## 95% ( 2.087, 3.273 ) ( 2.087, 3.273 )
## Calculations and Intervals on Original Scale
The bias-corrected and accelerated (BCa) bootstrap interval is known to lead to more stable intervals with better coverage. Will report this. However, it is a good thing that here 95%CI through different methods do not lead to widely different results.
Now, I will calculate this for different BMI categories:
data_class_1 <- data %>% filter(type_obesity=="Class 1 Obesity")
data_class_2 <- data %>% filter(type_obesity=="Class 2 Obesity")
data_class_3 <- data %>% filter(type_obesity=="Class 3 Obesity") Class 1:
boot_class1 <- one.boot(data_class_1$atelectasis_percent, mean, R=10000)
mean_boot_class1 <- mean(boot_class1$t)
mean_boot_class1## [1] 0.9250605
## BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
## Based on 10000 bootstrap replicates
##
## CALL :
## boot.ci(boot.out = boot_class1)
##
## Intervals :
## Level Normal Basic
## 95% ( 0.2140, 1.6456 ) ( 0.1210, 1.5323 )
##
## Level Percentile BCa
## 95% ( 0.3226, 1.7339 ) ( 0.3226, 1.7339 )
## Calculations and Intervals on Original Scale
Class 2:
boot_class2 <- one.boot(data_class_2$atelectasis_percent, mean, R=10000)
mean_boot_class2 <- mean(boot_class2$t)
mean_boot_class2## [1] 1.553594
## BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
## Based on 10000 bootstrap replicates
##
## CALL :
## boot.ci(boot.out = boot_class2)
##
## Intervals :
## Level Normal Basic
## 95% ( 0.732, 2.388 ) ( 0.708, 2.311 )
##
## Level Percentile BCa
## 95% ( 0.802, 2.406 ) ( 0.802, 2.406 )
## Calculations and Intervals on Original Scale
Class 3:
boot_class3 <- one.boot(data_class_3$atelectasis_percent, mean, R=10000)
mean_boot_class3 <- mean(boot_class3$t)
mean_boot_class3## [1] 4.036407
## BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
## Based on 10000 bootstrap replicates
##
## CALL :
## boot.ci(boot.out = boot_class3)
##
## Intervals :
## Level Normal Basic
## 95% ( 3.036, 5.007 ) ( 3.017, 4.959 )
##
## Level Percentile BCa
## 95% ( 3.099, 5.041 ) ( 3.099, 5.041 )
## Calculations and Intervals on Original Scale
The mean atelectasis percentage coverage in the sample was 2.66% (95%CI:2.09-3.27) and according to obesity categories: class 1 (0.93%, 95%CI:0.32-1.73), class 2 (1.55%, 95%CI:0.8-2.41), and class 3 (4.04%, 95%CI:3.1-5.04).
Now, I will continue assessing atelectasis percentage if assumed to be categorical ordinal:
Mean expected frequency:
mean_exp <- data %>% mutate(atelectasis_percent=factor(atelectasis_percent)) %>% summarize(mean_expected_freq = n()/(nlevels(type_obesity)*nlevels(atelectasis_percent)))
mean_exp## mean_expected_freq
## 1 8.740741
Mean expected frequency is greater than 5.0, so chi-squared without continuity correction is adequate.
## type_obesity
## atelectasis_percent Class 1 Obesity Class 2 Obesity Class 3 Obesity
## 0 54 38 67
## 2.5 2 7 2
## 5 1 2 11
## 7.5 4 4 25
## 10 0 1 5
## 12.5 0 0 1
## 15 0 1 3
## 17.5 1 0 6
## 27.5 0 0 1
## type_obesity
## atelectasis_percent Class 1 Obesity Class 2 Obesity Class 3 Obesity
## 0 0.228813559 0.161016949 0.283898305
## 2.5 0.008474576 0.029661017 0.008474576
## 5 0.004237288 0.008474576 0.046610169
## 7.5 0.016949153 0.016949153 0.105932203
## 10 0.000000000 0.004237288 0.021186441
## 12.5 0.000000000 0.000000000 0.004237288
## 15 0.000000000 0.004237288 0.012711864
## 17.5 0.004237288 0.000000000 0.025423729
## 27.5 0.000000000 0.000000000 0.004237288
## type_obesity
## atelectasis_percent Class 1 Obesity Class 2 Obesity Class 3 Obesity
## 0 0.870967742 0.716981132 0.553719008
## 2.5 0.032258065 0.132075472 0.016528926
## 5 0.016129032 0.037735849 0.090909091
## 7.5 0.064516129 0.075471698 0.206611570
## 10 0.000000000 0.018867925 0.041322314
## 12.5 0.000000000 0.000000000 0.008264463
## 15 0.000000000 0.018867925 0.024793388
## 17.5 0.016129032 0.000000000 0.049586777
## 27.5 0.000000000 0.000000000 0.008264463
##
## Pearson's Chi-squared test
##
## data: tab
## X-squared = 39.434, df = 16, p-value = 0.0009412
barplot(prop_fig2a,beside=TRUE,ylim=c(0,1),ylab="Relative frequency",
col=brewer.pal(9,"Blues"),
legend.text=c("0%","2.5%","5%","7.5%","10%","12.5%","15%","17.5%","27.5%"),
space = c(0.2, 1.5)
)Scatterplot
plot(atelectasis_percent~BMI, data=data, main="Scatterplot", xlab="Body mass index (kg/m²)", ylab="Atelectasis percent (%)")
abline(lm(atelectasis_percent~BMI),col="darkblue")Atelectasis percent seems to increase as BMI increases. However, relationship is not linear.
Would a smooth term be more useful to model SpO2?
ggplot(data, aes(BMI,atelectasis_percent)) +
geom_point(size=0.6,color="gray40") +
geom_smooth(method="loess", color="darkblue") +
ylab("Atelectasis percent (%)") + xlab("Body mass index (kg/m²)") +
theme_bw() +
theme(panel.border = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(colour = "black"),
axis.text.x = element_text(size=rel(1.2)),
axis.text.y = element_text(size=rel(1.2))
)What is the pattern when excluding participants with 0% atelectasis?
data %>% filter (atelectasis_percent>0) %>%
ggplot(aes(BMI,atelectasis_percent)) +
geom_point(size=0.6,color="gray40") +
geom_smooth(method="loess", color="darkblue") +
ylab("Atelectasis percent (%)") + xlab("Body mass index (kg/m²)") +
theme_bw() +
theme(panel.border = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(colour = "black"),
axis.text.x = element_text(size=rel(1.2)),
axis.text.y = element_text(size=rel(1.2))
)GAM model with k=2:
##
## Family: gaussian
## Link function: identity
##
## Formula:
## atelectasis_percent ~ s(BMI, k = 2)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.6589 0.2248 11.83 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(BMI) 1.963 1.999 97.75 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.457 Deviance explained = 46.2%
## GCV = 12.078 Scale est. = 11.926 n = 236
## k' edf k-index p-value
## s(BMI) 2 1.963441 0.9317886 0.14
GAM model with k=4:
##
## Family: gaussian
## Link function: identity
##
## Formula:
## atelectasis_percent ~ s(BMI, k = 4)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.6589 0.2141 12.42 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(BMI) 2.937 2.997 81.98 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.508 Deviance explained = 51.4%
## GCV = 11.001 Scale est. = 10.817 n = 236
## k' edf k-index p-value
## s(BMI) 3 2.937474 1.038686 0.6825
GAM model with k=6:
##
## Family: gaussian
## Link function: identity
##
## Formula:
## atelectasis_percent ~ s(BMI, k = 6)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.659 0.211 12.6 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(BMI) 4.572 4.91 52.93 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.522 Deviance explained = 53.1%
## GCV = 10.76 Scale est. = 10.506 n = 236
## k' edf k-index p-value
## s(BMI) 5 4.572447 1.077343 0.8925
GAM model with k=8:
##
## Family: gaussian
## Link function: identity
##
## Formula:
## atelectasis_percent ~ s(BMI, k = 8)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.6589 0.2115 12.57 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(BMI) 4.764 5.67 44.97 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.519 Deviance explained = 52.9%
## GCV = 10.821 Scale est. = 10.557 n = 236
## k' edf k-index p-value
## s(BMI) 7 4.764266 1.073694 0.8725
GAM model with k=10:
##
## Family: gaussian
## Link function: identity
##
## Formula:
## atelectasis_percent ~ s(BMI, k = 10)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.6589 0.2114 12.58 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(BMI) 4.92 5.982 42.62 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.52 Deviance explained = 53%
## GCV = 10.822 Scale est. = 10.55 n = 236
## k' edf k-index p-value
## s(BMI) 9 4.920424 1.075019 0.87
Best AIC:
## [[1]]
## [1] 1259.668
##
## [[2]]
## [1] 1237.596
##
## [[3]]
## [1] 1232.312
##
## [[4]]
## [1] 1233.637
##
## [[5]]
## [1] 1233.634
All models are significantly better than linear. Thus, using a smooth term for BMI to predict atelectasis percent is better than modelling a linear relationship.
Regarding AIC, greatest improvement in AIC is k=6. Will model with k=5 and k=7 to compare
GAM model with k=5:
##
## Family: gaussian
## Link function: identity
##
## Formula:
## atelectasis_percent ~ s(BMI, k = 5)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.6589 0.2129 12.49 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(BMI) 3.712 3.951 63.63 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.513 Deviance explained = 52.1%
## GCV = 10.914 Scale est. = 10.696 n = 236
## k' edf k-index p-value
## s(BMI) 4 3.712457 1.059789 0.7925
GAM model with k=7:
##
## Family: gaussian
## Link function: identity
##
## Formula:
## atelectasis_percent ~ s(BMI, k = 7)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.6589 0.2113 12.58 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(BMI) 4.747 5.454 47.01 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.52 Deviance explained = 53%
## GCV = 10.801 Scale est. = 10.538 n = 236
## k' edf k-index p-value
## s(BMI) 6 4.746815 1.075589 0.91
Best AIC:
## [[1]]
## [1] 1235.685
##
## [[2]]
## [1] 1232.312
##
## [[3]]
## [1] 1233.19
k=6 offers the lowest AIC. Will keep k=6 to model.
fig1a <- ggplot(data, aes(BMI,atelectasis_percent)) +
geom_point(size=0.6,color="gray60") +
geom_smooth(method="gam",formula = y ~ s(x, bs = "cs", k = 6), color="darkblue") +
labs(y="Atelectasis percent (%)",
x="Body mass index (kg/m²)",
tag="A") +
xlim(30,80) +
theme_bw() +
theme(panel.border = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(colour = "black"),
axis.text.x = element_text(size=rel(1.2)),
axis.text.y = element_text(size=rel(1.2))
)
fig1aNegative monotonic relationship since SpO2 decreases as BMI increases.
Will assess Spearman’s correlation again only to have a rough idea (will not report this in the paper):
##
## Spearman's rank correlation rho
##
## data: spo2_VPO and atelectasis_percent
## S = 3883525, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.7727568
Atelectasis percent exhibited a negative non-linear monotonic relationship with SpO2 (Figure 1A, rho= -0.773, p<0.001).
Interestingly, this figure is almost a mirror image of the priorly created plot for SpO2 ~ BMI.
Assess distribution of age by atelectasis (yes/no):
data_age <- data %>% group_by(atelectasis)
ggplot(data_age,aes(x = age)) +
geom_histogram(fill = "lightsteelblue4", colour = "black") +
facet_grid(atelectasis ~ .)Distribution near-normal, will assess mean and variance for further testing:
mean_age <- data_age %>%
summarise(n=n(),
age_mean = mean(age),
sd = sd(age),
variance = var(age)
)
mean_age## # A tibble: 2 × 5
## atelectasis n age_mean sd variance
## <fct> <int> <dbl> <dbl> <dbl>
## 1 Yes 77 39.6 9.30 86.6
## 2 No 159 40.6 10.1 103.
Variances near-similar, but group sizes differ. Welch’s t-test more suitable:
##
## Welch Two Sample t-test
##
## data: age by atelectasis
## t = -0.67931, df = 162.72, p-value = 0.4979
## alternative hypothesis: true difference in means between group Yes and group No is not equal to 0
## 95 percent confidence interval:
## -3.532230 1.724013
## sample estimates:
## mean in group Yes mean in group No
## 39.64935 40.55346
Age was similarly distributed among patients without atelectasis (40.6, sd:10.1) and those with atelectasis (39.6, sd:9.3) (p=0.498).
Mean expected frequency:
mean_exp <- data %>% drop_na(sex, atelectasis) %>%
summarize(mean_expected_freq = n()/(nlevels(sex)*nlevels(atelectasis)))
mean_exp## mean_expected_freq
## 1 59
Mean expected frequency is greater than 5.0, so chi-squared without continuity correction is adequate.
## atelectasis
## sex Yes No
## Woman 67 147
## Man 10 12
## atelectasis
## sex Yes No
## Woman 0.2839 0.6229
## Man 0.0424 0.0508
## atelectasis
## sex Yes No
## Woman 31.31 68.69
## Man 45.45 54.55
Mosaic Plot
data %>%
mutate(atelectasis = fct_relevel(atelectasis, "No", "Yes")) %>%
ggplot() +
geom_mosaic(aes(x = product(atelectasis,sex), fill=atelectasis),na.rm = TRUE) +
scale_fill_manual(values=c("grey95","lightsteelblue4")) +
theme_mosaic() +
theme(axis.text.x=element_text(size=rel(0.8)))##
## Pearson's Chi-squared test
##
## data: freq
## X-squared = 1.8161, df = 1, p-value = 0.1778
There were no significant differences in atelectasis ocurrence between men (45.5%) and women (31.3%) (p=0.178).
Mean expected frequency:
mean_exp <- data %>%
drop_na(sleep_apnea, atelectasis) %>%
summarize(mean_expected_freq = n()/(nlevels(sleep_apnea)*nlevels(atelectasis)))
mean_exp## mean_expected_freq
## 1 59
Mean expected frequency is greater than 5.0, so chi-squared without continuity correction is adequate.
## atelectasis
## sleep_apnea Yes No
## No 60 158
## Yes 17 1
## atelectasis
## sleep_apnea Yes No
## No 0.2542 0.6695
## Yes 0.0720 0.0042
## atelectasis
## sleep_apnea Yes No
## No 27.52 72.48
## Yes 94.44 5.56
Mosaic Plot
data %>%
mutate(atelectasis = fct_relevel(atelectasis, "No", "Yes")) %>%
ggplot() +
geom_mosaic(aes(x = product(atelectasis,sleep_apnea), fill=atelectasis),na.rm = TRUE) +
scale_fill_manual(values=c("grey95","lightsteelblue4")) +
theme_mosaic() +
theme(axis.text.x=element_text(size=rel(0.8)))##
## Pearson's Chi-squared test
##
## data: freq
## X-squared = 33.875, df = 1, p-value = 5.876e-09
Patients with a diagnosis of obstructive sleep apnea had atelectasis more frequently (94.4%) than those without the diagnosis (27.5%) (p<0.001).
Mean expected frequency:
mean_exp <- data %>%
drop_na(sleep_apnea, atelectasis_location) %>%
summarize(mean_expected_freq = n()/(nlevels(sleep_apnea)*nlevels(atelectasis_location)))
mean_exp## mean_expected_freq
## 1 19.25
Mean expected frequency is greater than 5.0, so chi-squared without continuity correction is adequate.
## atelectasis_location
## sleep_apnea Unilateral Bilateral
## No 43 17
## Yes 10 7
## atelectasis_location
## sleep_apnea Unilateral Bilateral
## No 0.5584 0.2208
## Yes 0.1299 0.0909
## atelectasis_location
## sleep_apnea Unilateral Bilateral
## No 71.67 28.33
## Yes 58.82 41.18
Mosaic Plot
ggplot(data = data) +
geom_mosaic(aes(x = product(atelectasis_location,sleep_apnea),
fill=atelectasis_location),na.rm = TRUE) +
scale_fill_manual(values=c("seagreen1","seagreen4")) +
theme_mosaic() ##
## Pearson's Chi-squared test
##
## data: freq
## X-squared = 1.0185, df = 1, p-value = 0.3129
The location of atelectasis was not different among patients with and without OSA (p=0.313).
data_spo2 <- data %>% group_by(atelectasis)
median_spo2 <- data_spo2 %>% summarize(n = n(),
spo2_median = median(spo2_VPO),
Q1 = quantile(spo2_VPO,0.25),
Q3 = quantile(spo2_VPO,0.75),
min = min(spo2_VPO),
max = max(spo2_VPO)
)
median_spo2## # A tibble: 2 × 7
## atelectasis n spo2_median Q1 Q3 min max
## <fct> <int> <int> <dbl> <dbl> <int> <int>
## 1 Yes 77 92 91 93 88 97
## 2 No 159 97 96 98 89 99
Distribution not normal and influential outliers. Will assess non-parametrically.
##
## Wilcoxon rank sum test with continuity correction
##
## data: spo2_VPO by atelectasis
## W = 465.5, p-value < 2.2e-16
## alternative hypothesis: true location shift is not equal to 0
The median SpO2 was significantly lower in patients with atelectasis (92, IQR: 91-93) compared to those without (97, IQR: 96-98) (p<0.001).
data_spo2 <- data %>%
group_by(atelectasis_location) %>%
drop_na(atelectasis_location)
median_spo2 <- data_spo2 %>% summarize(n = n(),
spo2_median = median(spo2_VPO),
Q1 = quantile(spo2_VPO,0.25),
Q3 = quantile(spo2_VPO,0.75),
min = min(spo2_VPO),
max = max(spo2_VPO)
)
median_spo2## # A tibble: 2 × 7
## atelectasis_location n spo2_median Q1 Q3 min max
## <fct> <int> <dbl> <dbl> <dbl> <int> <int>
## 1 Unilateral 53 92 92 93 88 95
## 2 Bilateral 24 91.5 90 92 88 97
Distribution not normal and influential outliers. Will assess non-parametrically.
##
## Wilcoxon rank sum test with continuity correction
##
## data: spo2_VPO by atelectasis_location
## W = 879, p-value = 0.006227
## alternative hypothesis: true location shift is not equal to 0
The median SpO2 was significantly lower in patients with bilateral atelectasis (91.5, IQR: 90-92) compared to those with unilateral atelectasis (92, IQR: 92-93) (p=0.006).
Scatterplot
plot(spo2_VPO~atelectasis_percent, data=data,
main="Scatterplot",
xlab="Atelectasis percent (%)",
ylab="SpO2 (%)")Decreasing SpO2 as atelectasis percent increases.
Would a smooth term be more useful to model SpO2?
ggplot(data, aes(atelectasis_percent,spo2_VPO)) +
geom_point(size=0.6,color="gray40") +
geom_smooth(method="loess", color="black") +
ylab("SpO2 (%)") + xlab("Atelectasis percent (%)") +
ylim(85,100) +
theme_bw() +
theme(panel.border = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(colour = "black"),
axis.text.x = element_text(size=rel(1.2)),
axis.text.y = element_text(size=rel(1.2))
)GAM model with k=2:
##
## Family: gaussian
## Link function: identity
##
## Formula:
## spo2_VPO ~ s(atelectasis_percent, k = 2)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 94.9958 0.1032 920.6 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(atelectasis_percent) 1.972 1.999 235.1 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.664 Deviance explained = 66.7%
## GCV = 2.5447 Scale est. = 2.5127 n = 236
## k' edf k-index p-value
## s(atelectasis_percent) 2 1.971542 0.9633485 0.265
GAM model with k=4:
##
## Family: gaussian
## Link function: identity
##
## Formula:
## spo2_VPO ~ s(atelectasis_percent, k = 4)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 94.9958 0.1023 928.5 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(atelectasis_percent) 2.522 2.833 168.8 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.67 Deviance explained = 67.3%
## GCV = 2.5076 Scale est. = 2.4702 n = 236
## k' edf k-index p-value
## s(atelectasis_percent) 3 2.522477 0.9820889 0.3675
GAM model with k=6:
##
## Family: gaussian
## Link function: identity
##
## Formula:
## spo2_VPO ~ s(atelectasis_percent, k = 6)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 94.9958 0.1014 936.4 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(atelectasis_percent) 3.731 4.296 113.9 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.675 Deviance explained = 68%
## GCV = 2.4784 Scale est. = 2.4287 n = 236
## k' edf k-index p-value
## s(atelectasis_percent) 5 3.730877 1.000348 0.4325
GAM model with k=8:
##
## Family: gaussian
## Link function: identity
##
## Formula:
## spo2_VPO ~ s(atelectasis_percent, k = 8)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 94.9958 0.1014 936.8 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(atelectasis_percent) 3.895 4.634 105.7 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.675 Deviance explained = 68.1%
## GCV = 2.4784 Scale est. = 2.427 n = 236
## k' edf k-index p-value
## s(atelectasis_percent) 7 3.894793 1.001219 0.5175
Best AIC:
## [[1]]
## [1] 892.1322
##
## [[2]]
## [1] 888.6461
##
## [[3]]
## [1] 885.8384
##
## [[4]]
## [1] 885.8287
All models are significantly better than linear. Thus, using a smooth term for atelectasis percent is better than modelling a linear relationship.
Regarding AIC, no model offers greater improvement in AIC than k=6. Will try a model with k=5:
GAM model with k=5:
##
## Family: gaussian
## Link function: identity
##
## Formula:
## spo2_VPO ~ s(atelectasis_percent, k = 5)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 94.9958 0.1013 937.9 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(atelectasis_percent) 3.677 3.932 125.1 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.676 Deviance explained = 68.1%
## GCV = 2.4703 Scale est. = 2.4213 n = 236
## k' edf k-index p-value
## s(atelectasis_percent) 4 3.676746 1.002302 0.485
Best AIC:
## [[1]]
## [1] 888.6461
##
## [[2]]
## [1] 885.0655
##
## [[3]]
## [1] 885.8384
There is a drop in AIC for k=5, which also offers the best k-index. Nonetheless, one problem with this is that the extra knot is explaining a clump around 12.%, for which there was only one single observation. Thus, it is likely that this clump and additional knot is only explaining noise in the data, and would thus not be a good representation of the trend in the variable. Thus, will keep k=4 to model as this model offers the best visual representation of the trend in all categories.
fig1c <- ggplot(data, aes(atelectasis_percent,spo2_VPO)) +
geom_point(size=0.6, color="gray60",
position = position_jitter(seed = 1, width = .2)) +
geom_smooth(method="gam",formula = y ~ s(x, bs = "cs", k = 4), color="black") +
ylim(83,100) +
labs(y="SpO2 (%)",x="Atelectasis percent (%)",tag="C") +
theme_bw() +
theme(panel.border = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(colour = "black"),
axis.text.x = element_text(size=rel(1.2)),
axis.text.y = element_text(size=rel(1.2))
)
fig1cNegative monotonic relationship since SpO2 decreases as BMI increases. Will assess Spearman’s correlation coefficient:
##
## Spearman's rank correlation rho
##
## data: spo2_VPO and atelectasis_percent
## S = 3883525, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.7727568
Atelectasis percent exhibited a negative non-linear monotonic relationship with SpO2 (Figure 1C, rho= -0.773, p<0.001).
Since there is only one participant in the 30% category, will collapse with the 20 category for further analyses:
datamodel <- data
datamodel$atelectasis_percent <- factor(datamodel$atelectasis_percent,
levels=c(0,2.5,5,7.5,10,12.5,15,17.5,27.5)) %>%
fct_collapse("17.5%" = c(17.5,27.5)) %>%
factor(labels = c("0%","2.5%","5%","7.5%","10%","12.5%","15%","17.5%"))
table(datamodel$atelectasis_percent)##
## 0% 2.5% 5% 7.5% 10% 12.5% 15% 17.5%
## 159 11 14 33 6 1 4 8
Assess distribution of SpO2 by atelectasis percent categories:
data_spo2 <- datamodel %>% group_by(atelectasis_percent)
ggplot(data_spo2,aes(x = spo2_VPO)) +
geom_histogram(fill = "aquamarine", colour = "black") +
facet_grid(atelectasis_percent ~ .)Distribution not normal, group sizes are different and there are outliers in both directions, depending where you are located. Thus, will proceed with non-parametric assessment:
median_spo2 <- data_spo2 %>%
summarize(n = n(),
spo2_median = median(spo2_VPO),
Q1 = quantile(spo2_VPO,0.25),
Q3 = quantile(spo2_VPO,0.75),
min = min(spo2_VPO),
max = max(spo2_VPO)
)
median_spo2## # A tibble: 8 × 7
## atelectasis_percent n spo2_median Q1 Q3 min max
## <fct> <int> <dbl> <dbl> <dbl> <int> <int>
## 1 0% 159 97 96 98 89 99
## 2 2.5% 11 94 94 94 93 95
## 3 5% 14 92 92 93 91 94
## 4 7.5% 33 92 92 93 88 97
## 5 10% 6 91 91 91 90 92
## 6 12.5% 1 92 92 92 92 92
## 7 15% 4 91.5 90.5 92 89 92
## 8 17.5% 8 88.5 88 89.2 88 92
##
## Kruskal-Wallis rank sum test
##
## data: spo2_VPO by atelectasis_percent
## Kruskal-Wallis chi-squared = 141.19, df = 7, p-value < 2.2e-16
There was a decreasing trend in median SpO2 with higher atelectasis percentage extension (p<0.001).
ggplot(datamodel, aes(x = atelectasis_percent, y = spo2_VPO)) +
geom_boxplot(width = .4, outlier.shape = NA, fill="aliceblue") +
geom_point(size = 1.5, alpha = .3,
position = position_jitter(seed = 1, width = .1)) +
ylab("SpO2 (%)") + xlab("Atelectasis percent") +
ylim(85,100) +
labs(tag="Kruskall-Wallis: p<0.001") +
theme_bw() +
theme(panel.border = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(colour = "black"),
axis.ticks.length.x = unit(.1, "in"),
axis.text.x = element_text(size=rel(1.2)),
axis.text.y = element_text(size=rel(1.2)),
plot.tag.position = "top",
plot.tag = element_text(size=rel(0.9))
)datamodel %>% mutate(
type_obesity=fct_recode(type_obesity,
"1"="Class 1 Obesity",
"2"="Class 2 Obesity",
"3"="Class 3 Obesity")) %>%
count(type_obesity, sleep_apnea, atelectasis_percent) %>%
ggplot(aes(x = sleep_apnea, y = atelectasis_percent, color = sleep_apnea)) +
geom_point(aes(group = sleep_apnea, size = n)) +
scale_color_manual(values=c("gray40","darkred")) +
facet_wrap(~type_obesity, scales = "free_x",
labeller = labeller(type_obesity = label_both)) +
scale_size(breaks = c(1, 10, 20, 30, 40, 50, 60)) +
theme_light()Sleep apnea was more common with higher BMI categories and also with higher atelectasis percentage. Atelectasis percent increases at higher obesity classes.
Plot DAG model to visualizes again:
This procedure was performed as suggested in this article.
## age _||_ alt_
## age _||_ sex
## alt_ _||_ sex
## alt_ _||_ typ_ | age, sex, slp_
## hb _||_ slp_ | age, alt_, atl_, sex
## hb _||_ typ_ | age, alt_, atl_, sex
Test conditional independencies:
subsetcondit <- datamodel %>% select(c("age",
"sex",
"type_obesity",
"spo2_VPO",
"hb",
"sleep_apnea",
"atelectasis_percent",
"altitude_cat")
)
subsetcondit <- subsetcondit %>%
mutate(sex = as.numeric(sex),
sleep_apnea = as.numeric(sleep_apnea)
)
corr <- lavCor(subsetcondit,
ordered=c("sex",
"type_obesity",
"sleep_apnea",
"atelectasis_percent",
"altitude_cat",
"spo2_VPO")
)
corr## age sex typ_bs s2_VPO hb slp_pn atlct_ alttd_
## age 1.000
## sex 0.022 1.000
## type_obesity -0.142 0.276 1.000
## spo2_VPO 0.014 -0.102 -0.344 1.000
## hb 0.071 0.403 0.052 -0.045 1.000
## sleep_apnea 0.064 0.645 0.286 -0.622 0.219 1.000
## atelectasis_percent -0.071 0.180 0.460 -0.916 0.069 0.617 1.000
## altitude_cat 0.014 -0.119 -0.054 -0.063 -0.027 0.193 0.013 1.000
## estimate p.value 2.5%
## age _||_ alt_ 0.01389636 0.83200380 -0.1140077
## age _||_ sex 0.02162883 0.74124789 -0.1063663
## alt_ _||_ sex -0.11910345 0.06774401 -0.2431642
## alt_ _||_ typ_ | age, sex, slp_ -0.08294080 0.20739686 -0.2092591
## hb _||_ slp_ | age, alt_, atl_, sex -0.10913579 0.09729158 -0.2346846
## hb _||_ typ_ | age, alt_, atl_, sex -0.06364414 0.33483830 -0.1908952
## 97.5%
## age _||_ alt_ 0.141349797
## age _||_ sex 0.148922919
## alt_ _||_ sex 0.008729808
## alt_ _||_ typ_ | age, sex, slp_ 0.046071800
## hb _||_ slp_ | age, alt_, atl_, sex 0.019943221
## hb _||_ typ_ | age, alt_, atl_, sex 0.065693116
Conditional independence assumption OK. Minimal set of adjustment is age, sex, and sleep_apnea.
This paper and accompanying code were used to calculate prevalence ratios.
A modified Poisson regression model with robust errors will be applied to obtain prevalence ratios.
dataprev <- data %>%
mutate(atelectasis = as.numeric(
as.character(
fct_recode(atelectasis,
"0"="No",
"1"="Yes"))))
poisson_fit <- glm(atelectasis ~ type_obesity,
data = dataprev,
family = poisson(link = log)
)
tidy(poisson_fit, exponentiate = TRUE, conf.int = TRUE)## # A tibble: 3 × 7
## term estimate std.error statistic p.value conf.low conf.high
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 0.129 0.353 -5.79 6.92e-9 0.0590 0.240
## 2 type_obesityClass 2 O… 2.19 0.438 1.79 7.28e-2 0.953 5.45
## 3 type_obesityClass 3 O… 3.46 0.379 3.28 1.05e-3 1.75 7.87
Robust standard errors:
## (Intercept) type_obesityClass 2 Obesity
## (Intercept) 0.108871 -0.1088710
## type_obesityClass 2 Obesity -0.108871 0.1566697
## type_obesityClass 3 Obesity -0.108871 0.1088710
## type_obesityClass 3 Obesity
## (Intercept) -0.108871
## type_obesityClass 2 Obesity 0.108871
## type_obesityClass 3 Obesity 0.119125
#Calculate the standard error
se <- sqrt(diag(covmat))
# Bind together model output
# 1. exponentiated coefficients
# 2. robust standard errors
# 3. 95% confidence intervals
# Note that qnorm(0.975) approximately equals 1.96
model_output <- cbind(
Estimate = exp(coef(poisson_fit)),
`Robust SE` = se,
Lower = exp(coef(poisson_fit) - qnorm(0.975) * se),
Upper = exp(coef(poisson_fit) + qnorm(0.975) * se)
)
# Coerce model_output into a data frame
model_output <- as.data.frame(round(model_output,2))
model_output## Estimate Robust SE Lower Upper
## (Intercept) 0.13 0.33 0.07 0.25
## type_obesityClass 2 Obesity 2.19 0.40 1.01 4.76
## type_obesityClass 3 Obesity 3.46 0.35 1.76 6.80
poisson_fit <- glm(atelectasis ~ type_obesity + age + sex + sleep_apnea,
data = dataprev,
family = poisson(link = log))
tidy(poisson_fit, exponentiate = TRUE, conf.int = TRUE)## # A tibble: 6 × 7
## term estimate std.error statistic p.value conf.low conf.high
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 0.140 0.615 -3.20 1.38e-3 0.0398 0.447
## 2 type_obesityClass 2 O… 2.13 0.439 1.72 8.47e-2 0.924 5.30
## 3 type_obesityClass 3 O… 3.10 0.384 2.95 3.21e-3 1.55 7.12
## 4 age 0.997 0.0121 -0.283 7.78e-1 0.973 1.02
## 5 sexMan 0.722 0.380 -0.859 3.90e-1 0.326 1.46
## 6 sleep_apneaYes 3.33 0.307 3.92 8.85e-5 1.77 5.92
Robust standard errors:
## (Intercept) type_obesityClass 2 Obesity
## (Intercept) 0.217765534 -9.742767e-02
## type_obesityClass 2 Obesity -0.097427669 1.378078e-01
## type_obesityClass 3 Obesity -0.097289004 9.657037e-02
## age -0.003104120 7.319695e-05
## sexMan 0.007684832 1.033022e-02
## sleep_apneaYes 0.003376050 -1.234191e-02
## type_obesityClass 3 Obesity age
## (Intercept) -9.728900e-02 -3.104120e-03
## type_obesityClass 2 Obesity 9.657037e-02 7.319695e-05
## type_obesityClass 3 Obesity 1.078039e-01 8.049292e-05
## age 8.049292e-05 7.871795e-05
## sexMan -3.306396e-03 -2.475432e-04
## sleep_apneaYes -1.090238e-02 -9.687233e-05
## sexMan sleep_apneaYes
## (Intercept) 0.0076848325 3.376050e-03
## type_obesityClass 2 Obesity 0.0103302170 -1.234191e-02
## type_obesityClass 3 Obesity -0.0033063959 -1.090238e-02
## age -0.0002475432 -9.687233e-05
## sexMan 0.0591480165 -1.874945e-02
## sleep_apneaYes -0.0187494458 2.613647e-02
#Calculate the standard error
se <- sqrt(diag(covmat))
# Bind together model output
# 1. exponentiated coefficients
# 2. robust standard errors
# 3. 95% confidence intervals
# Note that qnorm(0.975) approximately equals 1.96
model_output2 <- cbind(
Estimate = exp(coef(poisson_fit)),
`Robust SE` = se,
Lower = exp(coef(poisson_fit) - qnorm(0.975) * se),
Upper = exp(coef(poisson_fit) + qnorm(0.975) * se)
)
# Coerce model_output into a data frame
model_output2 <- as.data.frame(round(model_output2,2))
model_output2## Estimate Robust SE Lower Upper
## (Intercept) 0.14 0.47 0.06 0.35
## type_obesityClass 2 Obesity 2.13 0.37 1.03 4.41
## type_obesityClass 3 Obesity 3.10 0.33 1.63 5.90
## age 1.00 0.01 0.98 1.01
## sexMan 0.72 0.24 0.45 1.16
## sleep_apneaYes 3.33 0.16 2.43 4.57
model_output <- model_output %>%
dplyr::slice(2:3) %>%
rename(PR = Estimate, SE = `Robust SE`) %>%
mutate("95%CI" = paste(Lower,Upper,sep="-")) %>%
select(-c(Lower,Upper))
model_output2 <- model_output2 %>%
dplyr::slice(2:3) %>%
rename(aPR = Estimate, aSE = `Robust SE`) %>%
mutate("a95%CI" = paste(Lower,Upper,sep="-")) %>%
select(-c(Lower,Upper))
table2 <- dplyr::bind_cols(model_output, model_output2) %>%
rownames_to_column(var = "Category") %>%
mutate_at("Category", str_replace, "type_obesity", "")
flextable(table2) %>%
save_as_docx(path = paste(tabfolder,"/Table2.docx",sep=""))
table2## Category PR SE 95%CI aPR aSE a95%CI
## 1 Class 2 Obesity 2.19 0.40 1.01-4.76 2.13 0.37 1.03-4.41
## 2 Class 3 Obesity 3.46 0.35 1.76-6.8 3.10 0.33 1.63-5.9
Some code available in the following resource
was used for these analyses:
- Dunn, Taylor. 2020. “Ordinal Regression in R: Part 1.” March 15,
2020.
Rename type obesity levels to facilitate reading of results:
datamodel <- datamodel %>%
mutate(type_obesity=fct_recode(type_obesity,
"1"="Class 1 Obesity",
"2"="Class 2 Obesity",
"3"="Class 3 Obesity"),
atelectasis_percent=ordered(atelectasis_percent)
) Visualize pattern of atelectasis percent increase by obesity type category.
datamodel %>%
ggplot(aes(x = type_obesity, fill = atelectasis_percent)) +
geom_bar() +
scale_fill_manual(values = brewer.pal(8,"Blues")) +
theme_minimal()The loglog link function will be used for all models since this distribution provides the best fit for the fully adjusted model:
datamodel %>%
nest(data = everything()) %>%
crossing(
link = c("logit", "probit", "cloglog", "loglog", "cauchit"),
threshold = c("flexible", "equidistant", "symmetric", "symmetric2")
) %>%
mutate(
mod = map2(
data, link,
~clm(atelectasis_percent ~ type_obesity + sex + age + sleep_apnea,
data = .x, link = .y
)
)
) %>%
mutate(
mod_summary = map(
mod,
~glance(.x) %>% mutate(logLik = as.numeric(logLik))
)
) %>%
unnest(mod_summary) %>%
dplyr::select(link, threshold, logLik, edf, AIC, BIC) %>%
arrange(logLik) %>%
gt()| link | threshold | logLik | edf | AIC | BIC |
|---|---|---|---|---|---|
| cauchit | equidistant | -254.5032 | 12 | 533.0064 | 574.5724 |
| cauchit | flexible | -254.5032 | 12 | 533.0064 | 574.5724 |
| cauchit | symmetric | -254.5032 | 12 | 533.0064 | 574.5724 |
| cauchit | symmetric2 | -254.5032 | 12 | 533.0064 | 574.5724 |
| cloglog | equidistant | -252.1230 | 12 | 528.2459 | 569.8119 |
| cloglog | flexible | -252.1230 | 12 | 528.2459 | 569.8119 |
| cloglog | symmetric | -252.1230 | 12 | 528.2459 | 569.8119 |
| cloglog | symmetric2 | -252.1230 | 12 | 528.2459 | 569.8119 |
| probit | equidistant | -248.6292 | 12 | 521.2584 | 562.8244 |
| probit | flexible | -248.6292 | 12 | 521.2584 | 562.8244 |
| probit | symmetric | -248.6292 | 12 | 521.2584 | 562.8244 |
| probit | symmetric2 | -248.6292 | 12 | 521.2584 | 562.8244 |
| logit | equidistant | -248.3580 | 12 | 520.7161 | 562.2820 |
| logit | flexible | -248.3580 | 12 | 520.7161 | 562.2820 |
| logit | symmetric | -248.3580 | 12 | 520.7161 | 562.2820 |
| logit | symmetric2 | -248.3580 | 12 | 520.7161 | 562.2820 |
| loglog | equidistant | -247.2876 | 12 | 518.5752 | 560.1412 |
| loglog | flexible | -247.2876 | 12 | 518.5752 | 560.1412 |
| loglog | symmetric | -247.2876 | 12 | 518.5752 | 560.1412 |
| loglog | symmetric2 | -247.2876 | 12 | 518.5752 | 560.1412 |
Obesity class:
fit_BMI <- clm(atelectasis_percent~type_obesity,
data = datamodel,
link = "loglog", threshold = "flexible")
summary(fit_BMI)## formula: atelectasis_percent ~ type_obesity
## data: datamodel
##
## link threshold nobs logLik AIC niter max.grad cond.H
## loglog flexible 236 -260.47 538.94 9(2) 8.12e-09 9.3e+02
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## type_obesity2 0.8277 0.4379 1.890 0.0587 .
## type_obesity3 1.4929 0.3793 3.936 8.28e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Threshold coefficients:
## Estimate Std. Error z value
## 0%|2.5% 1.9847 0.3536 5.613
## 2.5%|5% 2.1794 0.3578 6.091
## 5%|7.5% 2.4687 0.3648 6.767
## 7.5%|10% 3.5817 0.4105 8.725
## 10%|12.5% 3.9794 0.4395 9.054
## 12.5%|15% 4.0624 0.4468 9.092
## 15%|17.5% 4.4794 0.4914 9.115
## Tests of nominal effects
##
## formula: atelectasis_percent ~ type_obesity
## Df logLik AIC LRT Pr(>Chi)
## <none> -260.47 538.94
## type_obesity
## Tests of scale effects
##
## formula: atelectasis_percent ~ type_obesity
## Df logLik AIC LRT Pr(>Chi)
## <none> -260.47 538.94
## type_obesity 2 -257.80 537.60 5.3462 0.06904 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Output not showing result of nominal test. When running traceback, this seems to be a problem of failure to converge of the nominal test function. However, this does not constitute a problem in the model convergence itself as shown bellow:
## nobs logLik niter max.grad cond.H logLik.Error
## 236 -260.47 9(2) 8.12e-09 9.3e+02 <1e-10
##
## Estimate Std.Err Gradient Error Cor.Dec Sig.Dig
## 0%|2.5% 1.9847 0.3536 -8.12e-09 -1.07e-09 8 9
## 2.5%|5% 2.1794 0.3578 -1.83e-10 -1.07e-09 8 9
## 5%|7.5% 2.4687 0.3648 -2.22e-10 -1.07e-09 8 9
## 7.5%|10% 3.5817 0.4105 -6.20e-11 -1.07e-09 8 9
## 10%|12.5% 3.9794 0.4395 -2.98e-13 -1.07e-09 8 9
## 12.5%|15% 4.0624 0.4468 2.38e-13 -1.07e-09 8 9
## 15%|17.5% 4.4794 0.4914 -6.06e-12 -1.07e-09 8 9
## type_obesity2 0.8277 0.4379 2.55e-15 -1.07e-09 8 8
## type_obesity3 1.4929 0.3793 1.33e-14 -1.07e-09 8 9
##
## Eigen values of Hessian:
## 732.0787 324.4480 249.4324 81.8157 66.7963 32.3935 19.1533 7.2541 0.7886
##
## Convergence message from clm:
## (0) successful convergence
## In addition: Absolute and relative convergence criteria were met
This problem has been signalled in post1 and post2, without an available solution yet within the package. However, running a model with a nominal term for the explanatory variable and comparing them trough ANOVA is an alternative way of testing the proportional odds assumption:
fit_BMInom <- clm(atelectasis_percent ~ 1, nominal= ~ type_obesity,
data = datamodel,
link = "loglog", threshold = "flexible")## Warning: (-1) Model failed to converge with max|grad| = 0.0490995 (tol = 1e-06)
## In addition: maximum number of consecutive Newton modifications reached
## formula: atelectasis_percent ~ 1
## nominal: ~type_obesity
## data: datamodel
##
## link threshold nobs logLik AIC niter max.grad cond.H
## loglog flexible 236 -240.69 523.38 12(3) 4.91e-02 -8.4e+16
##
## Threshold coefficients:
## Estimate Std. Error z value
## 0%|2.5%.(Intercept) 2.0811 NA NA
## 2.5%|5%.(Intercept) 2.4260 NA NA
## 5%|7.5%.(Intercept) 2.6502 NA NA
## 7.5%|10%.(Intercept) 6.0240 NA NA
## 10%|12.5%.(Intercept) 28.9503 NA NA
## 12.5%|15%.(Intercept) -13.1247 NA NA
## 15%|17.5%.(Intercept) -2.6936 NA NA
## 0%|2.5%.type_obesity2 -0.9203 NA NA
## 2.5%|5%.type_obesity2 -0.4894 NA NA
## 5%|7.5%.type_obesity2 -0.3551 NA NA
## 7.5%|10%.type_obesity2 -2.0707 NA NA
## 10%|12.5%.type_obesity2 -21.7026 NA NA
## 12.5%|15%.type_obesity2 11.2433 NA NA
## 15%|17.5%.type_obesity2 6.8281 NA NA
## 0%|2.5%.type_obesity3 -1.5540 NA NA
## 2.5%|5%.type_obesity3 -1.8479 NA NA
## 5%|7.5%.type_obesity3 -1.7660 NA NA
## 7.5%|10%.type_obesity3 -4.0662 NA NA
## 10%|12.5%.type_obesity3 -26.5944 NA NA
## 12.5%|15%.type_obesity3 15.5804 NA NA
## 15%|17.5%.type_obesity3 5.5194 NA NA
Nominal model failed to converge, likely due to many new subgroups needed to create a nominal model, many of which may have been empty.
One additional problem that may have led to the inability to test the proportional odds assumption is that there is one category with only one observation:
##
## 0% 2.5% 5% 7.5% 10% 12.5% 15% 17.5%
## 159 11 14 33 6 1 4 8
It is known that having few observations per category does not affect the results of ordinal regression, and that some categories may need to be combined to assess proportional odds assumption. REF
Will collapse categories to test:
data_prop <- datamodel
data_prop$atelectasis_percent <- fct_collapse(data_prop$atelectasis_percent,
"0%" = c("0%","2.5%"),
"5%" = c("5%","7.5%"),
"10%" = c("10%","12.5%"),
"15%" = c("15%","17.5%")
)
table(data_prop$atelectasis_percent)##
## 0% 5% 10% 15%
## 170 47 7 12
fit_BMI2 <- clm(atelectasis_percent~type_obesity,
data = data_prop,
link = "loglog", threshold = "flexible")
summary(fit_BMI2)## formula: atelectasis_percent ~ type_obesity
## data: data_prop
##
## link threshold nobs logLik AIC niter max.grad cond.H
## loglog flexible 236 -176.83 363.66 8(0) 8.87e-10 1.1e+02
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## type_obesity2 0.4770 0.5402 0.883 0.377
## type_obesity3 1.7145 0.4317 3.972 7.14e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Threshold coefficients:
## Estimate Std. Error z value
## 0%|5% 2.2884 0.4083 5.604
## 5%|10% 3.7049 0.4565 8.116
## 10%|15% 4.1871 0.4895 8.554
## Tests of nominal effects
##
## formula: atelectasis_percent ~ type_obesity
## Df logLik AIC LRT Pr(>Chi)
## <none> -176.83 363.66
## type_obesity
## Tests of scale effects
##
## formula: atelectasis_percent ~ type_obesity
## Df logLik AIC LRT Pr(>Chi)
## <none> -176.83 363.66
## type_obesity 2 -176.78 367.56 0.10419 0.9492
fit_BMInom2 <- clm(atelectasis_percent ~ 1, nominal= ~ type_obesity,
data = data_prop,
link = "loglog", threshold = "flexible")## Warning: (-3) not all thresholds are increasing: fit is invalid
## In addition: Absolute convergence criterion was met, but relative criterion was not met
## formula: atelectasis_percent ~ 1
## nominal: ~type_obesity
## data: data_prop
##
## link threshold nobs logLik AIC niter max.grad cond.H
## loglog flexible 236 -171.03 360.05 21(0) 6.23e-09 1.3e+11
##
## Threshold coefficients:
## Estimate Std. Error z value
## 0%|5%.(Intercept) 2.4590 0.4473 5.497
## 5%|10%.(Intercept) 23.0042 12665.6882 0.002
## 10%|15%.(Intercept) -3.2539 16494.5757 0.000
## 0%|5%.type_obesity2 -0.6488 0.5704 -1.137
## 5%|10%.type_obesity2 -19.7462 12665.6882 -0.002
## 10%|15%.type_obesity2 7.2146 16494.5758 0.000
## 0%|5%.type_obesity3 -1.8822 0.4689 -4.014
## 5%|10%.type_obesity3 -21.0511 12665.6882 -0.002
## 10%|15%.type_obesity3 5.7042 16494.5757 0.000
## Likelihood ratio tests of cumulative link models:
##
## formula: nominal: link: threshold:
## fit_BMI2 atelectasis_percent ~ type_obesity ~1 loglog flexible
## fit_BMInom2 atelectasis_percent ~ 1 ~type_obesity loglog flexible
##
## no.par AIC logLik LR.stat df Pr(>Chisq)
## fit_BMI2 5 363.66 -176.83
## fit_BMInom2 9 360.05 -171.03 11.608 4 0.02052 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
The model with nominal effects has a marginally lower AIC (360.05) than the ordinal model (363.66) p=0.02. Proportional odds assumption not met. However, nominal model had problem with convergence (unstable estimates).
One alternative would be to fully model through a multinomial logistic regression model. As already shown, nominal models have failed to converge and have reduced statistical power.
Other alternatives would be to “ignore non-proportional odds, knowing that the odds ratio may still be very meaningful, or fit a partial proportional odds model or constrained partial PO model using either Bayesian modeling with blrm or using a frequentist procedure with the VGAM package vglm function” REF1 plus REF2.
Despite not meeting proportional odds assumption, I will present results of univariable and multivariable ordinal regression models to present first-order effects since this model is parsimonious and easier to understand. I will then try to create a partial PO model to further characterize and understand exposure-effect relationships.
Age
fit_age <- clm(atelectasis_percent~age,
data = datamodel,
link = "loglog", threshold = "flexible")
summary(fit_age)## formula: atelectasis_percent ~ age
## data: datamodel
##
## link threshold nobs logLik AIC niter max.grad cond.H
## loglog flexible 236 -271.61 559.21 9(2) 1.63e-13 2.2e+05
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## age -0.00832 0.01148 -0.725 0.469
##
## Threshold coefficients:
## Estimate Std. Error z value
## 0%|2.5% 0.5956 0.4694 1.269
## 2.5%|5% 0.7814 0.4715 1.657
## 5%|7.5% 1.0578 0.4753 2.226
## 7.5%|10% 2.1455 0.5083 4.221
## 10%|12.5% 2.5384 0.5318 4.773
## 12.5%|15% 2.6206 0.5378 4.873
## 15%|17.5% 3.0351 0.5751 5.278
## Tests of nominal effects
##
## formula: atelectasis_percent ~ age
## Df logLik AIC LRT Pr(>Chi)
## <none> -271.61 559.21
## age
## Tests of scale effects
##
## formula: atelectasis_percent ~ age
## Df logLik AIC LRT Pr(>Chi)
## <none> -271.61 559.21
## age 1 -271.32 560.65 0.56311 0.453
Proportional odds assumption not showing either. Perhaps this is a problem due to centering. Will center and re-assess.
fit_age2 <- clm(atelectasis_percent~agec,
data = datamodel,
link = "loglog", threshold = "flexible")
summary(fit_age2)## formula: atelectasis_percent ~ agec
## data: datamodel
##
## link threshold nobs logLik AIC niter max.grad cond.H
## loglog flexible 236 -271.61 559.21 8(2) 2.87e-08 1.5e+03
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## agec -0.3350 0.4621 -0.725 0.469
##
## Threshold coefficients:
## Estimate Std. Error z value
## 0%|2.5% 0.5956 0.4694 1.269
## 2.5%|5% 0.7814 0.4715 1.657
## 5%|7.5% 1.0578 0.4753 2.226
## 7.5%|10% 2.1455 0.5083 4.221
## 10%|12.5% 2.5384 0.5318 4.773
## 12.5%|15% 2.6206 0.5378 4.873
## 15%|17.5% 3.0351 0.5751 5.278
## Tests of nominal effects
##
## formula: atelectasis_percent ~ agec
## Df logLik AIC LRT Pr(>Chi)
## <none> -271.61 559.21
## agec
## Tests of scale effects
##
## formula: atelectasis_percent ~ agec
## Df logLik AIC LRT Pr(>Chi)
## <none> -271.61 559.21
## agec 1 -271.32 560.65 0.56311 0.453
Still not shown. Nonetheless, proportional odds assumption is not quite as relevant for covariates. Will assess remaining variables to decide best approach.
uni_age <- tidy(fit_age, conf.int = T, conf.type = "Wald") %>%
transmute(
term, across(c(estimate, conf.low, conf.high), exp)
) %>%
gt() %>%
fmt_number(c(estimate, conf.low, conf.high), decimals = 2) %>%
cols_merge(c(conf.low, conf.high),
pattern = "{1}—{2}") %>%
cols_label(conf.low = "95%CI")
uni_age| term | estimate | 95%CI |
|---|---|---|
| 0%|2.5% | 1.81 | 0.72—4.55 |
| 2.5%|5% | 2.18 | 0.87—5.50 |
| 5%|7.5% | 2.88 | 1.13—7.31 |
| 7.5%|10% | 8.55 | 3.16—23.15 |
| 10%|12.5% | 12.66 | 4.46—35.90 |
| 12.5%|15% | 13.74 | 4.79—39.44 |
| 15%|17.5% | 20.80 | 6.74—64.21 |
| age | 0.99 | 0.97—1.01 |
Sex
fit_sex <- clm(atelectasis_percent~sex,
data = datamodel,
link = "loglog", threshold = "flexible")
summary(fit_sex)## formula: atelectasis_percent ~ sex
## data: datamodel
##
## link threshold nobs logLik AIC niter max.grad cond.H
## loglog flexible 236 -270.89 557.78 8(2) 2.12e-08 2.5e+02
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## sexMan 0.5058 0.3396 1.489 0.136
##
## Threshold coefficients:
## Estimate Std. Error z value
## 0%|2.5% 0.9816 0.1228 7.994
## 2.5%|5% 1.1679 0.1314 8.889
## 5%|7.5% 1.4453 0.1463 9.879
## 7.5%|10% 2.5366 0.2346 10.810
## 10%|12.5% 2.9300 0.2817 10.400
## 12.5%|15% 3.0122 0.2929 10.285
## 15%|17.5% 3.4264 0.3570 9.598
## Tests of nominal effects
##
## formula: atelectasis_percent ~ sex
## Df logLik AIC LRT Pr(>Chi)
## <none> -270.89 557.78
## sex
## Tests of scale effects
##
## formula: atelectasis_percent ~ sex
## Df logLik AIC LRT Pr(>Chi)
## <none> -270.89 557.78
## sex 1 -270.88 559.77 0.0099172 0.9207
Does not hold for sex either.
uni_sex <- tidy(fit_sex, conf.int = T, conf.type = "Wald") %>%
transmute(
term, across(c(estimate, conf.low, conf.high), exp)
) %>%
gt() %>%
fmt_number(c(estimate, conf.low, conf.high), decimals = 2) %>%
cols_merge(c(conf.low, conf.high),
pattern = "{1}—{2}") %>%
cols_label(conf.low = "95%CI")
uni_sex| term | estimate | 95%CI |
|---|---|---|
| 0%|2.5% | 2.67 | 2.10—3.39 |
| 2.5%|5% | 3.22 | 2.49—4.16 |
| 5%|7.5% | 4.24 | 3.19—5.65 |
| 7.5%|10% | 12.64 | 7.98—20.02 |
| 10%|12.5% | 18.73 | 10.78—32.53 |
| 12.5%|15% | 20.33 | 11.45—36.10 |
| 15%|17.5% | 30.77 | 15.28—61.93 |
| sexMan | 1.66 | 0.85—3.23 |
Sleep Apnea
fit_OSA <- clm(atelectasis_percent~sleep_apnea,
data = datamodel, link = "loglog",
threshold = "flexible")
summary(fit_OSA)## formula: atelectasis_percent ~ sleep_apnea
## data: datamodel
##
## link threshold nobs logLik AIC niter max.grad cond.H
## loglog flexible 236 -256.47 528.94 8(2) 2.23e-07 2.2e+02
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## sleep_apneaYes 1.8657 0.2831 6.59 4.39e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Threshold coefficients:
## Estimate Std. Error z value
## 0%|2.5% 1.1260 0.1296 8.689
## 2.5%|5% 1.3381 0.1407 9.507
## 5%|7.5% 1.6418 0.1577 10.412
## 7.5%|10% 2.7962 0.2471 11.316
## 10%|12.5% 3.1992 0.2929 10.923
## 12.5%|15% 3.2829 0.3037 10.809
## 15%|17.5% 3.7033 0.3663 10.109
## Tests of nominal effects
##
## formula: atelectasis_percent ~ sleep_apnea
## Df logLik AIC LRT Pr(>Chi)
## <none> -256.47 528.94
## sleep_apnea
## Tests of scale effects
##
## formula: atelectasis_percent ~ sleep_apnea
## Df logLik AIC LRT Pr(>Chi)
## <none> -256.47 528.94
## sleep_apnea 1 -255.84 529.67 1.2653 0.2606
Does not hold for OSA either.
uni_OSA <- tidy(fit_OSA, conf.int = T, conf.type = "Wald") %>%
transmute(
term, across(c(estimate, conf.low, conf.high), exp)
) %>%
gt() %>%
fmt_number(c(estimate, conf.low, conf.high), decimals = 2) %>%
cols_merge(c(conf.low, conf.high),
pattern = "{1}—{2}") %>%
cols_label(conf.low = "95%CI")
uni_OSA| term | estimate | 95%CI |
|---|---|---|
| 0%|2.5% | 3.08 | 2.39—3.97 |
| 2.5%|5% | 3.81 | 2.89—5.02 |
| 5%|7.5% | 5.16 | 3.79—7.03 |
| 7.5%|10% | 16.38 | 10.09—26.59 |
| 10%|12.5% | 24.51 | 13.81—43.52 |
| 12.5%|15% | 26.65 | 14.70—48.34 |
| 15%|17.5% | 40.58 | 19.79—83.20 |
| sleep_apneaYes | 6.46 | 3.71—11.25 |
Since proportional odds not met for prior model with multiple ordered categories, I will use the priorly collapsed categories to model since AIC was also much lower when including collapsed factors. Perhaps this solves problems in prior models?
Type obesity (ordinal model created before)
uni_BMI <- tidy(fit_BMI2, conf.int = T, conf.type = "Wald") %>%
transmute(
term, across(c(estimate, conf.low, conf.high), exp)
) %>%
gt() %>%
fmt_number(c(estimate, conf.low, conf.high), decimals = 2) %>%
cols_merge(c(conf.low, conf.high),
pattern = "{1}—{2}") %>%
cols_label(conf.low = "95%CI")
uni_BMI| term | estimate | 95%CI |
|---|---|---|
| 0%|5% | 9.86 | 4.43—21.95 |
| 5%|10% | 40.64 | 16.61—99.44 |
| 10%|15% | 65.83 | 25.22—171.82 |
| type_obesity2 | 1.61 | 0.56—4.65 |
| type_obesity3 | 5.55 | 2.38—12.94 |
Age
fit_age <- clm(atelectasis_percent~age,
data = data_prop,
link = "loglog", threshold = "flexible")
summary(fit_age)## formula: atelectasis_percent ~ age
## data: data_prop
##
## link threshold nobs logLik AIC niter max.grad cond.H
## loglog flexible 236 -191.67 391.33 8(0) 1.69e-08 9.0e+04
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## age -0.009863 0.012461 -0.792 0.429
##
## Threshold coefficients:
## Estimate Std. Error z value
## 0%|5% 0.7210 0.5067 1.423
## 5%|10% 2.0847 0.5415 3.850
## 10%|15% 2.5600 0.5691 4.498
## Tests of nominal effects
##
## formula: atelectasis_percent ~ age
## Df logLik AIC LRT Pr(>Chi)
## <none> -191.67 391.33
## age
## Tests of scale effects
##
## formula: atelectasis_percent ~ age
## Df logLik AIC LRT Pr(>Chi)
## <none> -191.67 391.33
## age 1 -191.64 393.28 0.052727 0.8184
Proportional odds assumption not showing either. Will try centered variable.
fit_age2 <- clm(atelectasis_percent~agec,
data = data_prop,
link = "loglog", threshold = "flexible")
summary(fit_age2)## formula: atelectasis_percent ~ agec
## data: data_prop
##
## link threshold nobs logLik AIC niter max.grad cond.H
## loglog flexible 236 -191.67 391.33 8(0) 1.69e-08 1.5e+02
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## agec -0.3971 0.5017 -0.792 0.429
##
## Threshold coefficients:
## Estimate Std. Error z value
## 0%|5% 0.7210 0.5067 1.423
## 5%|10% 2.0847 0.5415 3.850
## 10%|15% 2.5600 0.5691 4.498
## Tests of nominal effects
##
## formula: atelectasis_percent ~ agec
## Df logLik AIC LRT Pr(>Chi)
## <none> -191.67 391.33
## agec
## Tests of scale effects
##
## formula: atelectasis_percent ~ agec
## Df logLik AIC LRT Pr(>Chi)
## <none> -191.67 391.33
## agec 1 -191.64 393.28 0.052727 0.8184
Still not shown. Nonetheless, proportional odds assumption is not quite as relevant for covariates. Will assess remaining variables to decide best approach.
uni_age <- tidy(fit_age, conf.int = T, conf.type = "Wald") %>%
transmute(
term, across(c(estimate, conf.low, conf.high), exp)
) %>%
gt() %>%
fmt_number(c(estimate, conf.low, conf.high), decimals = 2) %>%
cols_merge(c(conf.low, conf.high),
pattern = "{1}—{2}") %>%
cols_label(conf.low = "95%CI")
uni_age| term | estimate | 95%CI |
|---|---|---|
| 0%|5% | 2.06 | 0.76—5.55 |
| 5%|10% | 8.04 | 2.78—23.24 |
| 10%|15% | 12.94 | 4.24—39.47 |
| age | 0.99 | 0.97—1.01 |
Sex
fit_sex <- clm(atelectasis_percent~sex,
data = data_prop,
link = "loglog", threshold = "flexible")
summary(fit_sex)## formula: atelectasis_percent ~ sex
## data: data_prop
##
## link threshold nobs logLik AIC niter max.grad cond.H
## loglog flexible 236 -191.03 390.06 8(0) 1.05e-08 1.6e+01
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## sexMan 0.5297 0.3602 1.471 0.141
##
## Threshold coefficients:
## Estimate Std. Error z value
## 0%|5% 1.1720 0.1329 8.817
## 5%|10% 2.5403 0.2356 10.784
## 10%|15% 3.0162 0.2937 10.271
## Tests of nominal effects
##
## formula: atelectasis_percent ~ sex
## Df logLik AIC LRT Pr(>Chi)
## <none> -191.03 390.06
## sex 2 -190.33 392.66 1.3989 0.4969
## Tests of scale effects
##
## formula: atelectasis_percent ~ sex
## Df logLik AIC LRT Pr(>Chi)
## <none> -191.03 390.06
## sex 1 -190.98 391.97 0.091001 0.7629
Sex OK.
uni_sex <- tidy(fit_sex, conf.int = T, conf.type = "Wald") %>%
transmute(
term, across(c(estimate, conf.low, conf.high), exp)
) %>%
gt() %>%
fmt_number(c(estimate, conf.low, conf.high), decimals = 2) %>%
cols_merge(c(conf.low, conf.high),
pattern = "{1}—{2}") %>%
cols_label(conf.low = "95%CI")
uni_sex| term | estimate | 95%CI |
|---|---|---|
| 0%|5% | 3.23 | 2.49—4.19 |
| 5%|10% | 12.68 | 7.99—20.12 |
| 10%|15% | 20.41 | 11.48—36.30 |
| sexMan | 1.70 | 0.84—3.44 |
Sleep Apnea
fit_OSA <- clm(atelectasis_percent~sleep_apnea,
data = data_prop,
link = "loglog", threshold = "flexible")
summary(fit_OSA)## formula: atelectasis_percent ~ sleep_apnea
## data: data_prop
##
## link threshold nobs logLik AIC niter max.grad cond.H
## loglog flexible 236 -181.28 370.56 8(0) 8.01e-11 1.6e+01
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## sleep_apneaYes 1.684 0.312 5.398 6.74e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Threshold coefficients:
## Estimate Std. Error z value
## 0%|5% 1.2989 0.1390 9.342
## 5%|10% 2.7400 0.2460 11.138
## 10%|15% 3.2257 0.3029 10.649
## Tests of nominal effects
##
## formula: atelectasis_percent ~ sleep_apnea
## Df logLik AIC LRT Pr(>Chi)
## <none> -181.28 370.56
## sleep_apnea 2 -181.20 374.41 0.14765 0.9288
## Tests of scale effects
##
## formula: atelectasis_percent ~ sleep_apnea
## Df logLik AIC LRT Pr(>Chi)
## <none> -181.28 370.56
## sleep_apnea 1 -181.22 372.44 0.11567 0.7338
OSA OK.
uni_OSA <- tidy(fit_OSA, conf.int = T, conf.type = "Wald") %>%
transmute(
term, across(c(estimate, conf.low, conf.high), exp)
) %>%
gt() %>%
fmt_number(c(estimate, conf.low, conf.high), decimals = 2) %>%
cols_merge(c(conf.low, conf.high),
pattern = "{1}—{2}") %>%
cols_label(conf.low = "95%CI")
uni_OSA| term | estimate | 95%CI |
|---|---|---|
| 0%|5% | 3.67 | 2.79—4.81 |
| 5%|10% | 15.49 | 9.56—25.08 |
| 10%|15% | 25.17 | 13.90—45.58 |
| sleep_apneaYes | 5.39 | 2.92—9.93 |
This modelling approach with collapsed factors is way more stable. Thus, will proceed with this modelling approach.
fit_multi <- clm(atelectasis_percent ~ type_obesity + age + sex + sleep_apnea,
data = data_prop,
link = "loglog", threshold = "flexible")
summary(fit_multi)## formula: atelectasis_percent ~ type_obesity + age + sex + sleep_apnea
## data: data_prop
##
## link threshold nobs logLik AIC niter max.grad cond.H
## loglog flexible 236 -167.75 351.50 8(0) 8.07e-12 1.7e+05
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## type_obesity2 0.392239 0.542266 0.723 0.469475
## type_obesity3 1.632907 0.438320 3.725 0.000195 ***
## age -0.002986 0.013200 -0.226 0.821015
## sexMan -0.482583 0.429310 -1.124 0.260975
## sleep_apneaYes 1.697836 0.370437 4.583 4.58e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Threshold coefficients:
## Estimate Std. Error z value
## 0%|5% 2.2347 0.6855 3.260
## 5%|10% 3.7402 0.7201 5.194
## 10%|15% 4.2325 0.7428 5.698
Convergence:
## nobs logLik niter max.grad cond.H logLik.Error
## 236 -167.75 8(0) 8.07e-12 1.7e+05 <1e-10
##
## Estimate Std.Err Gradient Error Cor.Dec Sig.Dig
## 0%|5% 2.234661 0.6855 -5.33e-15 -2.17e-15 14 15
## 5%|10% 3.740199 0.7201 8.02e-12 5.94e-15 13 14
## 10%|15% 4.232542 0.7428 -8.07e-12 -2.70e-13 12 13
## type_obesity2 0.392239 0.5423 2.94e-15 -1.28e-16 15 15
## type_obesity3 1.632907 0.4383 5.42e-14 -2.58e-15 14 15
## age -0.002986 0.0132 2.65e-12 -4.02e-17 16 14
## sexMan -0.482583 0.4293 1.62e-14 1.80e-15 14 14
## sleep_apneaYes 1.697836 0.3704 3.22e-14 -6.86e-15 13 14
##
## Eigen values of Hessian:
## 1.056e+05 7.537e+01 2.777e+01 1.806e+01 1.239e+01 4.400e+00 3.547e+00 6.047e-01
##
## Convergence message from clm:
## (0) successful convergence
## In addition: Absolute and relative convergence criteria were met
Check proportional odds assumption:
## Tests of nominal effects
##
## formula: atelectasis_percent ~ type_obesity + age + sex + sleep_apnea
## Df logLik AIC LRT Pr(>Chi)
## <none> -167.75 351.50
## type_obesity
## age
## sex 2 -167.03 354.05 1.44341 0.4859
## sleep_apnea 2 -167.64 355.28 0.21756 0.8969
Proportional odds OK for all variables except obesity type (as already expected) and age, but proportional odds assumption is not as relevant for covariates.
Check for scale effects:
## Tests of scale effects
##
## formula: atelectasis_percent ~ type_obesity + age + sex + sleep_apnea
## Df logLik AIC LRT Pr(>Chi)
## <none> -167.75 351.50
## type_obesity 2 -166.57 353.14 2.35958 0.3073
## age 1 -167.69 353.39 0.10523 0.7456
## sex 1 -167.45 352.90 0.59651 0.4399
## sleep_apnea 1 -167.75 353.50 0.00001 0.9973
No scale effects, good news.
Summarize results in table:
tidy(fit_multi, conf.int = T, conf.type = "Wald") %>%
transmute(
term, across(c(estimate, conf.low, conf.high), exp)
) %>%
gt() %>%
fmt_number(c(estimate, conf.low, conf.high), decimals = 2) %>%
cols_merge(c(conf.low, conf.high),
pattern = "{1}—{2}") %>%
cols_label(conf.low = "95%CI")| term | estimate | 95%CI |
|---|---|---|
| 0%|5% | 9.34 | 2.44—35.81 |
| 5%|10% | 42.11 | 10.26—172.72 |
| 10%|15% | 68.89 | 16.07—295.41 |
| type_obesity2 | 1.48 | 0.51—4.28 |
| type_obesity3 | 5.12 | 2.17—12.09 |
| age | 1.00 | 0.97—1.02 |
| sexMan | 0.62 | 0.27—1.43 |
| sleep_apneaYes | 5.46 | 2.64—11.29 |
As mentioned before, a partial proportional odds model could be useful in understanding patterns leading to not meeting proportional odds assumption. Thus, I will fit a partial proportional odds model.
The loglog function remains the better function to fit a model with nominal component for type_obesity.
data_prop %>%
nest(data = everything()) %>%
crossing(
link = c("logit", "probit", "cloglog", "loglog", "cauchit"),
threshold = c("flexible", "equidistant")
) %>%
mutate(
mod = map2(
data, link,
~clm(atelectasis_percent ~ sex + age + sleep_apnea, nominal = ~ type_obesity,
data = .x, link = .y
)
)
) %>%
mutate(
mod_summary = map(
mod,
~glance(.x) %>% mutate(logLik = as.numeric(logLik))
)
) %>%
unnest(mod_summary) %>%
dplyr::select(link, threshold, logLik, edf, AIC, BIC) %>%
arrange(logLik) %>%
gt()| link | threshold | logLik | edf | AIC | BIC |
|---|---|---|---|---|---|
| cauchit | equidistant | -164.5107 | 12 | 353.0215 | 394.5875 |
| cauchit | flexible | -164.5107 | 12 | 353.0215 | 394.5875 |
| cloglog | equidistant | -163.4546 | 12 | 350.9092 | 392.4752 |
| cloglog | flexible | -163.4546 | 12 | 350.9092 | 392.4752 |
| logit | equidistant | -162.2484 | 12 | 348.4968 | 390.0628 |
| logit | flexible | -162.2484 | 12 | 348.4968 | 390.0628 |
| probit | equidistant | -162.2038 | 12 | 348.4076 | 389.9736 |
| probit | flexible | -162.2038 | 12 | 348.4076 | 389.9736 |
| loglog | equidistant | -161.8385 | 12 | 347.6771 | 389.2431 |
| loglog | flexible | -161.8385 | 12 | 347.6771 | 389.2431 |
fit_BMIpartuni <- vglm(atelectasis_percent ~ type_obesity,
data=data_prop, link="loglog",
family=cumulative(parallel=FALSE~type_obesity, reverse = TRUE)
)
summary(fit_BMIpartuni)##
## Call:
## vglm(formula = atelectasis_percent ~ type_obesity, family = cumulative(parallel = FALSE ~
## type_obesity, reverse = TRUE), data = data_prop, link = "loglog")
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept):1 -2.2336 0.4296 -5.200 2.00e-07 ***
## (Intercept):2 -4.1109 1.0082 -4.078 4.55e-05 ***
## (Intercept):3 -4.1109 1.0082 -4.078 4.55e-05 ***
## type_obesity2:1 0.5064 0.5760 0.879 0.3793
## type_obesity2:2 0.8722 1.2394 0.704 0.4816
## type_obesity2:3 0.1596 1.4268 0.112 0.9109
## type_obesity3:1 1.9507 0.4672 4.176 2.97e-05 ***
## type_obesity3:2 2.2295 1.0433 2.137 0.0326 *
## type_obesity3:3 1.7039 1.0608 1.606 0.1082
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Names of linear predictors: logitlink(P[Y>=2]), logitlink(P[Y>=3]),
## logitlink(P[Y>=4])
##
## Residual deviance: 352.2895 on 699 degrees of freedom
##
## Log-likelihood: -176.1448 on 699 degrees of freedom
##
## Number of Fisher scoring iterations: 6
##
## Warning: Hauck-Donner effect detected in the following estimate(s):
## '(Intercept):3'
##
##
## Exponentiated coefficients:
## type_obesity2:1 type_obesity2:2 type_obesity2:3 type_obesity3:1 type_obesity3:2
## 1.659259 2.392157 1.173077 7.033816 9.295238
## type_obesity3:3
## 5.495495
## 2.5 % 97.5 %
## (Intercept):1 0.05 0.25
## (Intercept):2 0.00 0.12
## (Intercept):3 0.00 0.12
## type_obesity2:1 0.54 5.13
## type_obesity2:2 0.21 27.15
## type_obesity2:3 0.07 19.22
## type_obesity3:1 2.82 17.57
## type_obesity3:2 1.20 71.83
## type_obesity3:3 0.69 43.95
Fit a model that allows proportional odds for all explanatory variables, except type obesity:
fit_BMIpart <- vglm(atelectasis_percent ~ type_obesity+sleep_apnea+sex+age,
data=data_prop,
link="loglog",
family=cumulative(parallel=FALSE~type_obesity, reverse = TRUE)
)
summary(fit_BMIpart)##
## Call:
## vglm(formula = atelectasis_percent ~ type_obesity + sleep_apnea +
## sex + age, family = cumulative(parallel = FALSE ~ type_obesity,
## reverse = TRUE), data = data_prop, link = "loglog")
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept):1 -2.185439 0.799026 -2.735 0.00624 **
## (Intercept):2 -4.197328 1.255720 -3.343 0.00083 ***
## (Intercept):3 -4.197329 1.255721 -3.343 0.00083 ***
## type_obesity2:1 0.524960 0.592935 0.885 0.37596
## type_obesity2:2 0.973580 1.283046 0.759 0.44797
## type_obesity2:3 0.231191 1.459054 0.158 0.87410
## type_obesity3:1 1.905298 0.486965 3.913 9.13e-05 ***
## type_obesity3:2 2.162276 1.105718 1.956 0.05052 .
## type_obesity3:3 1.610209 1.123131 1.434 0.15166
## sleep_apneaYes 2.074173 0.527271 3.934 8.36e-05 ***
## sexMan -0.387067 0.550704 -0.703 0.48214
## age -0.004762 0.015996 -0.298 0.76592
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Names of linear predictors: logitlink(P[Y>=2]), logitlink(P[Y>=3]),
## logitlink(P[Y>=4])
##
## Residual deviance: 335.3433 on 696 degrees of freedom
##
## Log-likelihood: -167.6716 on 696 degrees of freedom
##
## Number of Fisher scoring iterations: 15
##
## Warning: Hauck-Donner effect detected in the following estimate(s):
## '(Intercept):2', '(Intercept):3'
##
##
## Exponentiated coefficients:
## type_obesity2:1 type_obesity2:2 type_obesity2:3 type_obesity3:1 type_obesity3:2
## 1.6903912 2.6474060 1.2601005 6.7214083 8.6908981
## type_obesity3:3 sleep_apneaYes sexMan age
## 5.0038569 7.9579648 0.6790456 0.9952492
## [1] 359.3433
## 2.5 % 97.5 %
## (Intercept):1 0.02 0.54
## (Intercept):2 0.00 0.18
## (Intercept):3 0.00 0.18
## type_obesity2:1 0.53 5.40
## type_obesity2:2 0.21 32.73
## type_obesity2:3 0.07 22.00
## type_obesity3:1 2.59 17.46
## type_obesity3:2 1.00 75.90
## type_obesity3:3 0.55 45.22
## sleep_apneaYes 2.83 22.37
## sexMan 0.23 2.00
## age 0.96 1.03
The SpO2 variable does not have a normal distribution. Furthermore, the distance between 1% increases in SpO2 cannot be considered equidistant increases since values are determined from the S-shaped curve of hemoglobin saturation. This is the reason why the distribution of SpO2 is negatively skewed, with upper values reaching the saturation point of the hemoglobin curve.
Therefore, modelling SpO2 as a linear term could be potentially misleading. Nonetheless, a model assuming a gaussian distribution for SpO2 may potentially be easier to understand and communicate.
Thus, I will first model SpO2 assuming a gaussian distribution and I will then apply a fractional regression model which is more appropriate for the distribution of this variable. If assuming a gaussian distribution does not change the conclusions with respect to the more appropriate fractional model, then I will report the earlier. If results are discordant, then I will report the fractional model, instead.
Create variable with atelectasis percent as numeric to be able to model with smooth term:
First, I will fit an empty model
model <- gam(spo2_VPO ~ 1,
data=datamodel
)
AIC_empty <- AIC(model)
R2_empty <- summary(model)$r.sq
dev_empty <- summary(model)$dev.expl
summary(model)##
## Family: gaussian
## Link function: identity
##
## Formula:
## spo2_VPO ~ 1
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 94.996 0.178 533.7 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
##
## R-sq.(adj) = 0 Deviance explained = 0%
## GCV = 7.5084 Scale est. = 7.4766 n = 236
Fit again GAM with only smooth BMI term (the one used for Figure 1a):
model_BMI <- gam(spo2_VPO ~ s(BMI,k=5),
data=datamodel
)
AIC_BMI <- AIC(model_BMI)
R2_BMI <- summary(model_BMI)$r.sq
dev_BMI <- summary(model_BMI)$dev.expl
summary(model_BMI)##
## Family: gaussian
## Link function: identity
##
## Formula:
## spo2_VPO ~ s(BMI, k = 5)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 94.9958 0.1399 679.1 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(BMI) 3.685 3.941 37.81 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.382 Deviance explained = 39.2%
## GCV = 4.7121 Scale est. = 4.6185 n = 236
##
## Method: GCV Optimizer: magic
## Smoothing parameter selection converged after 6 iterations.
## The RMS GCV score gradient at convergence was 4.669889e-06 .
## The Hessian was positive definite.
## Model rank = 5 / 5
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(BMI) 4.00 3.69 1.1 0.90
Fit a model that only contains OSA:
model_OSA_only <- gam(spo2_VPO ~ sleep_apnea,
data=datamodel
)
AIC_OSA_only <- AIC(model_OSA_only)
R2_OSA_only <- summary(model_OSA_only)$r.sq
dev_OSA_only <- summary(model_OSA_only)$dev.expl
summary(model_OSA_only)##
## Family: gaussian
## Link function: identity
##
## Formula:
## spo2_VPO ~ sleep_apnea
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 95.2661 0.1742 546.844 < 2e-16 ***
## sleep_apneaYes -3.5438 0.6308 -5.618 5.46e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
##
## R-sq.(adj) = 0.115 Deviance explained = 11.9%
## GCV = 6.6727 Scale est. = 6.6162 n = 236
##
## Method: GCV Optimizer: magic
## Model required no smoothing parameter selectionModel rank = 2 / 2
Fit a model that only contains atelectasis percent as smooth term:
model_atel_smooth <- gam(spo2_VPO ~ s(atelectasis_smooth,k=4),
data=datamodel
)
AIC_atel_smooth <- AIC(model_atel_smooth)
R2_atel_smooth <- summary(model_atel_smooth)$r.sq
dev_atel_smooth <- summary(model_atel_smooth)$dev.expl
summary(model_atel_smooth)##
## Family: gaussian
## Link function: identity
##
## Formula:
## spo2_VPO ~ s(atelectasis_smooth, k = 4)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 94.996 0.101 940.9 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(atelectasis_smooth) 2.825 2.972 168.2 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.678 Deviance explained = 68.2%
## GCV = 2.4451 Scale est. = 2.4055 n = 236
##
## Method: GCV Optimizer: magic
## Smoothing parameter selection converged after 6 iterations.
## The RMS GCV score gradient at convergence was 5.121918e-06 .
## The Hessian was positive definite.
## Model rank = 4 / 4
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(atelectasis_smooth) 3.00 2.82 1.01 0.56
Compare against atelectasis_percent as categorical. Will not model as ordinal as coefficients are more difficult to interpret than nominal terms. Whether this variable is entered as nominal or ordinal does not change the results of models. This can be checked by converting atelectasis percent to ordered and back to unordered, which I did as corroboration.
datamodel <- datamodel %>% mutate(
atelectasis_percent = factor(atelectasis_percent, ordered = FALSE)
)model_atel_only <- gam(spo2_VPO ~ atelectasis_percent,
data=datamodel
)
AIC_atel_only <- AIC(model_atel_only)
R2_atel_only <- summary(model_atel_only)$r.sq
dev_atel_only <- summary(model_atel_only)$dev.expl
summary(model_atel_only)##
## Family: gaussian
## Link function: identity
##
## Formula:
## spo2_VPO ~ atelectasis_percent
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 96.4780 0.1235 781.108 < 2e-16 ***
## atelectasis_percent2.5% -2.3871 0.4856 -4.916 1.69e-06 ***
## atelectasis_percent5% -3.9780 0.4342 -9.162 < 2e-16 ***
## atelectasis_percent7.5% -4.5083 0.2979 -15.132 < 2e-16 ***
## atelectasis_percent10% -5.4780 0.6477 -8.457 3.30e-15 ***
## atelectasis_percent12.5% -4.4780 1.5623 -2.866 0.00454 **
## atelectasis_percent15% -5.4780 0.7885 -6.948 3.85e-11 ***
## atelectasis_percent17.5% -7.4780 0.5643 -13.251 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
##
## R-sq.(adj) = 0.676 Deviance explained = 68.5%
## GCV = 2.5108 Scale est. = 2.4257 n = 236
##
## Method: GCV Optimizer: magic
## Model required no smoothing parameter selectionModel rank = 8 / 8
Modelling with smooth term offers improvement in aR2 and drop in AIC. Nonetheless, since this a non-linear relationship between BMI and the outcome is being studied, the explained deviance is more informative. Since categorical atelectasis percent explains greater deviance, I will continue using it as categorical for the subsequent models.
Fit model sBMI plus atelectasis percentage:
model_atel <- gam(spo2_VPO ~ s(BMI,k=5) + s(atelectasis_smooth,k=5),
data=datamodel
)
AIC_atel <- AIC(model_atel)
R2_atel <- summary(model_atel)$r.sq
dev_atel <- summary(model_atel)$dev.expl
summary(model_atel)##
## Family: gaussian
## Link function: identity
##
## Formula:
## spo2_VPO ~ s(BMI, k = 5) + s(atelectasis_smooth, k = 5)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 94.99576 0.09935 956.2 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(BMI) 3.449 3.837 3.052 0.0322 *
## s(atelectasis_smooth) 2.783 3.251 70.797 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.688 Deviance explained = 69.7%
## GCV = 2.4031 Scale est. = 2.3294 n = 236
##
## Method: GCV Optimizer: magic
## Smoothing parameter selection converged after 10 iterations.
## The RMS GCV score gradient at convergence was 2.779461e-07 .
## The Hessian was positive definite.
## Model rank = 9 / 9
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(BMI) 4.00 3.45 1.10 0.93
## s(atelectasis_smooth) 4.00 2.78 0.99 0.41
Model sleep apnea plus sBMI:
model_OSA <- gam(spo2_VPO ~ s(BMI,k=5) + sleep_apnea,
data=datamodel
)
AIC_OSA <- AIC(model_OSA)
R2_OSA <- summary(model_OSA)$r.sq
dev_OSA <- summary(model_OSA)$dev.expl
summary(model_OSA)##
## Family: gaussian
## Link function: identity
##
## Formula:
## spo2_VPO ~ s(BMI, k = 5) + sleep_apnea
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 95.2065 0.1375 692.657 < 2e-16 ***
## sleep_apneaYes -2.7625 0.5085 -5.432 1.41e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(BMI) 3.807 3.977 37.54 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.451 Deviance explained = 46.2%
## GCV = 4.2072 Scale est. = 4.1037 n = 236
##
## Method: GCV Optimizer: magic
## Smoothing parameter selection converged after 5 iterations.
## The RMS GCV score gradient at convergence was 5.372761e-06 .
## The Hessian was positive definite.
## Model rank = 6 / 6
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(BMI) 4.00 3.81 1.14 0.97
Model sBMI + sleep apnea + atelectasis percent:
model_OSA_atel <- gam(spo2_VPO ~ s(BMI,k=5) +
sleep_apnea + s(atelectasis_smooth,k=5),
data=datamodel
)
AIC_OSA_atel <- AIC(model_OSA_atel)
R2_OSA_atel <- summary(model_OSA_atel)$r.sq
dev_OSA_atel <- summary(model_OSA_atel)$dev.expl
summary(model_OSA_atel)##
## Family: gaussian
## Link function: identity
##
## Formula:
## spo2_VPO ~ s(BMI, k = 5) + sleep_apnea + s(atelectasis_smooth,
## k = 5)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 95.0447 0.1039 914.572 <2e-16 ***
## sleep_apneaYes -0.6418 0.4121 -1.557 0.121
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(BMI) 3.528 3.879 3.085 0.026 *
## s(atelectasis_smooth) 2.750 3.217 56.814 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.69 Deviance explained = 70%
## GCV = 2.3998 Scale est. = 2.3156 n = 236
##
## Method: GCV Optimizer: magic
## Smoothing parameter selection converged after 9 iterations.
## The RMS GCV score gradient at convergence was 4.264197e-07 .
## The Hessian was positive definite.
## Model rank = 10 / 10
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(BMI) 4.00 3.53 1.11 0.94
## s(atelectasis_smooth) 4.00 2.75 0.98 0.38
Fit model adjusted for confounders:
model_adj <- gam(spo2_VPO ~ s(BMI,k=5) +
sex + age + sleep_apnea + hb + altitude_cat,
data=datamodel, na.action=na.omit
)
AIC_adj <- AIC(model_adj)
R2_adj <- summary(model_adj)$r.sq
dev_adj <- summary(model_adj)$dev.expl
summary(model_adj)##
## Family: gaussian
## Link function: identity
##
## Formula:
## spo2_VPO ~ s(BMI, k = 5) + sex + age + sleep_apnea + hb + altitude_cat
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 96.885653 1.710883 56.629 < 2e-16 ***
## sexMan 0.610780 0.503589 1.213 0.226
## age -0.006073 0.013711 -0.443 0.658
## sleep_apneaYes -2.903071 0.542254 -5.354 2.13e-07 ***
## hb -0.101724 0.114371 -0.889 0.375
## altitude_catModerate altitude -0.103037 0.396523 -0.260 0.795
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(BMI) 3.773 3.969 37.22 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.448 Deviance explained = 46.8%
## GCV = 4.305 Scale est. = 4.1253 n = 234
##
## Method: GCV Optimizer: magic
## Smoothing parameter selection converged after 6 iterations.
## The RMS GCV score gradient at convergence was 6.504339e-06 .
## The Hessian was positive definite.
## Model rank = 10 / 10
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(BMI) 4.00 3.77 1.12 0.98
Fit adjusted model plus atelectasis percentage:
model_plus <- gam(spo2_VPO ~ s(BMI,k=5) +
sex + age + sleep_apnea + hb + altitude_cat +
atelectasis_percent,
data=datamodel, na.action=na.omit
)
AIC_plus <- AIC(model_plus)
R2_plus <- summary(model_plus)$r.sq
dev_plus <- summary(model_plus)$dev.expl
summary(model_plus)##
## Family: gaussian
## Link function: identity
##
## Formula:
## spo2_VPO ~ s(BMI, k = 5) + sex + age + sleep_apnea + hb + altitude_cat +
## atelectasis_percent
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 96.295053 1.295969 74.304 < 2e-16 ***
## sexMan 0.295662 0.385893 0.766 0.44440
## age -0.002954 0.010466 -0.282 0.77803
## sleep_apneaYes -0.673227 0.443733 -1.517 0.13067
## hb 0.014394 0.087176 0.165 0.86901
## altitude_catModerate altitude -0.173961 0.302243 -0.576 0.56550
## atelectasis_percent2.5% -2.148467 0.494381 -4.346 2.13e-05 ***
## atelectasis_percent5% -3.784264 0.467908 -8.088 4.21e-14 ***
## atelectasis_percent7.5% -4.164107 0.365708 -11.386 < 2e-16 ***
## atelectasis_percent10% -5.029637 0.714830 -7.036 2.53e-11 ***
## atelectasis_percent12.5% -4.220356 1.582238 -2.667 0.00822 **
## atelectasis_percent15% -5.077611 0.817844 -6.209 2.67e-09 ***
## atelectasis_percent17.5% -6.053448 0.777222 -7.789 2.73e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(BMI) 3.268 3.733 2.289 0.0844 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.686 Deviance explained = 70.6%
## GCV = 2.5213 Scale est. = 2.346 n = 234
##
## Method: GCV Optimizer: magic
## Smoothing parameter selection converged after 4 iterations.
## The RMS GCV score gradient at convergence was 1.26394e-07 .
## The Hessian was positive definite.
## Model rank = 17 / 17
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(BMI) 4.00 3.27 1.06 0.80
Build a dataframe to compare models:
models <- data.frame(Model = c("empty",
"sBMI",
"OSA_only",
"atel_only",
"s(atel)",
"sBMI_atel",
"sBMI_OSA",
"sBMI_atel_OSA",
"adjusted",
"adjusted_plus_atel"),
AIC = c(AIC_empty,
AIC_BMI,
AIC_OSA_only,
AIC_atel_only,
AIC_atel_smooth,
AIC_atel,
AIC_OSA,
AIC_OSA_atel,
AIC_adj,
AIC_plus),
aR2 = c(R2_empty,
R2_BMI,
R2_OSA_only,
R2_atel_only,
R2_atel_smooth,
R2_atel,
R2_OSA,
R2_OSA_atel,
R2_adj,
R2_plus),
dev = c(dev_empty,
dev_BMI,
dev_OSA_only,
dev_atel_only,
dev_atel_smooth,
dev_atel,
dev_OSA,
dev_OSA_atel,
dev_adj,
dev_plus)
)Sort by AIC
## Model AIC aR2 dev
## 1 sBMI_atel_OSA 878.0318 0.6902833 0.6998746
## 2 sBMI_atel 878.4249 0.6884364 0.6966988
## 3 adjusted_plus_atel 881.2705 0.6858420 0.7064277
## 4 s(atel) 882.6807 0.6782654 0.6821328
## 5 atel_only 888.7213 0.6755647 0.6852288
## 6 adjusted 1007.2337 0.4475739 0.4683733
## 7 sBMI_OSA 1010.6786 0.4511281 0.4623563
## 8 sBMI 1037.4747 0.3822678 0.3919547
## 9 OSA_only 1119.6558 0.1150826 0.1188482
## 10 empty 1147.5158 0.0000000 0.0000000
Sort by deviance
models <- models %>% mutate(aR2 = round(aR2,3)*100,
dev = round(dev,3)*100
)
models %>% arrange(desc(dev))## Model AIC aR2 dev
## 1 adjusted_plus_atel 881.2705 68.6 70.6
## 2 sBMI_atel_OSA 878.0318 69.0 70.0
## 3 sBMI_atel 878.4249 68.8 69.7
## 4 atel_only 888.7213 67.6 68.5
## 5 s(atel) 882.6807 67.8 68.2
## 6 adjusted 1007.2337 44.8 46.8
## 7 sBMI_OSA 1010.6786 45.1 46.2
## 8 sBMI 1037.4747 38.2 39.2
## 9 OSA_only 1119.6558 11.5 11.9
## 10 empty 1147.5158 0.0 0.0
Sort by adjusted R2
## Model AIC aR2 dev
## 1 sBMI_atel_OSA 878.0318 69.0 70.0
## 2 sBMI_atel 878.4249 68.8 69.7
## 3 adjusted_plus_atel 881.2705 68.6 70.6
## 4 s(atel) 882.6807 67.8 68.2
## 5 atel_only 888.7213 67.6 68.5
## 6 sBMI_OSA 1010.6786 45.1 46.2
## 7 adjusted 1007.2337 44.8 46.8
## 8 sBMI 1037.4747 38.2 39.2
## 9 OSA_only 1119.6558 11.5 11.9
## 10 empty 1147.5158 0.0 0.0
Some conclusions from model assuming normal distribution for
SpO2:
- Most of the effect of BMI was atenuated when including atelectasis
percent. Nonetheless, a smooth term for BMI was still statistically
significant in adjusted models. Will check if these conclusions hold in
the more appropriate fractional model.
- There were problems with heteroskedasticity using Gaussian function,
with greater error higher SpO2 values. Notably, error term at lower SpO2
values was low, meaning that the terms included in the model very likely
explain most of the variability at low SpO2 values. This could make
sense, as it is unlikely that normal SpO2 values will be explained by
these factors. Thus, it may be a case of true heteroskedasticity. Will
again check in fractional model.
Convert SpO2 to fractional values between 0 and 1 to model.
First, I will fit an empty model
model<-gam(spo2_fraction~1,
data=datamodel,
family = quasibinomial(link = logit)
)
R2_empty <- summary(model)$r.sq
dev_empty <- summary(model)$dev.expl
summary(model)##
## Family: quasibinomial
## Link function: logit
##
## Formula:
## spo2_fraction ~ 1
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.94355 0.03744 78.62 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
##
## R-sq.(adj) = 0 Deviance explained = -3.04e-13%
## GCV = 0.015586 Scale est. = 0.015728 n = 236
Fit again GAM with only smooth BMI term (the one used for Figure 1a):
model_BMI <- gam(spo2_fraction~s(BMI,k=5),
data=datamodel,
family = quasibinomial(link = logit)
)
R2_BMI <- summary(model_BMI)$r.sq
dev_BMI <- summary(model_BMI)$dev.expl
summary(model_BMI)##
## Family: quasibinomial
## Link function: logit
##
## Formula:
## spo2_fraction ~ s(BMI, k = 5)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.98307 0.03217 92.73 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(BMI) 3.312 3.737 32.29 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.379 Deviance explained = 32.9%
## GCV = 0.010767 Scale est. = 0.010884 n = 236
##
## Method: GCV Optimizer: outer newton
## full convergence after 5 iterations.
## Gradient range [2.084401e-08,2.084401e-08]
## (score 0.01076717 & scale 0.01088391).
## Hessian positive definite, eigenvalue range [3.213419e-05,3.213419e-05].
## Model rank = 5 / 5
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(BMI) 4.00 3.31 1.09 0.88
Fit a model that only contains OSA:
model_OSA_only<-gam(spo2_fraction~sleep_apnea,
data=datamodel,
family = quasibinomial(link = logit)
)
R2_OSA_only <- summary(model_OSA_only)$r.sq
dev_OSA_only <- summary(model_OSA_only)$dev.expl
summary(model_OSA_only)##
## Family: quasibinomial
## Link function: logit
##
## Formula:
## spo2_fraction ~ sleep_apnea
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.00191 0.03833 78.326 < 2e-16 ***
## sleep_apneaYes -0.59672 0.10971 -5.439 1.34e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
##
## R-sq.(adj) = 0.115 Deviance explained = 10.3%
## GCV = 0.014099 Scale est. = 0.014441 n = 236
##
## Method: GCV Optimizer: outer newton
## Model required no smoothing parameter selectionModel rank = 2 / 2
Fit a model that only contains atelectasis percent:
model_atel_only<-gam(spo2_fraction ~ atelectasis_percent,
data=datamodel,
family = quasibinomial(link = logit)
)
R2_atel_only <- summary(model_atel_only)$r.sq
dev_atel_only <- summary(model_atel_only)$dev.expl
summary(model_atel_only)##
## Family: quasibinomial
## Link function: logit
##
## Formula:
## spo2_fraction ~ atelectasis_percent
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.31028 0.03416 96.903 < 2e-16 ***
## atelectasis_percent2.5% -0.54251 0.10713 -5.064 8.45e-07 ***
## atelectasis_percent5% -0.79798 0.08751 -9.118 < 2e-16 ***
## atelectasis_percent7.5% -0.87205 0.06127 -14.233 < 2e-16 ***
## atelectasis_percent10% -0.99665 0.11831 -8.424 4.10e-15 ***
## atelectasis_percent12.5% -0.86794 0.29467 -2.945 0.00356 **
## atelectasis_percent15% -0.99665 0.14287 -6.976 3.26e-11 ***
## atelectasis_percent17.5% -1.21954 0.09601 -12.703 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
##
## R-sq.(adj) = 0.676 Deviance explained = 62.6%
## GCV = 0.006189 Scale est. = 0.0063048 n = 236
##
## Method: GCV Optimizer: outer newton
## Model required no smoothing parameter selectionModel rank = 8 / 8
Fit model sBMI plus atelectasis percentage:
model_atel<-gam(spo2_fraction ~ s(BMI,k=5) + atelectasis_percent,
data=datamodel,
family = quasibinomial(link = logit)
)
R2_atel <- summary(model_atel)$r.sq
dev_atel <- summary(model_atel)$dev.expl
summary(model_atel)##
## Family: quasibinomial
## Link function: logit
##
## Formula:
## spo2_fraction ~ s(BMI, k = 5) + atelectasis_percent
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.29735 0.03566 92.459 < 2e-16 ***
## atelectasis_percent2.5% -0.54196 0.10717 -5.057 8.77e-07 ***
## atelectasis_percent5% -0.76229 0.09217 -8.271 1.13e-14 ***
## atelectasis_percent7.5% -0.83200 0.06932 -12.002 < 2e-16 ***
## atelectasis_percent10% -0.94286 0.12600 -7.483 1.59e-12 ***
## atelectasis_percent12.5% -0.80592 0.29893 -2.696 0.00754 **
## atelectasis_percent15% -0.95486 0.14686 -6.502 4.99e-10 ***
## atelectasis_percent17.5% -1.12631 0.12209 -9.225 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(BMI) 1.005 1.01 1.553 0.215
##
## R-sq.(adj) = 0.678 Deviance explained = 62.9%
## GCV = 0.0061984 Scale est. = 0.0063098 n = 236
##
## Method: GCV Optimizer: outer newton
## full convergence after 6 iterations.
## Gradient range [-4.583064e-09,-4.583064e-09]
## (score 0.006198443 & scale 0.006309842).
## Hessian positive definite, eigenvalue range [5.485351e-09,5.485351e-09].
## Model rank = 12 / 12
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(BMI) 4.00 1.01 1.1 0.94
Model sleep apnea plus sBMI:
model_OSA <- gam(spo2_fraction ~ s(BMI,k=5) + sleep_apnea,
data=datamodel,
family = quasibinomial(link = logit)
)
R2_OSA <- summary(model_OSA)$r.sq
dev_OSA <- summary(model_OSA)$dev.expl
summary(model_OSA)##
## Family: quasibinomial
## Link function: logit
##
## Formula:
## spo2_fraction ~ s(BMI, k = 5) + sleep_apnea
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.02441 0.03253 92.982 < 2e-16 ***
## sleep_apneaYes -0.45014 0.09506 -4.735 3.82e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(BMI) 3.557 3.883 30.07 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.425 Deviance explained = 38.8%
## GCV = 0.0099253 Scale est. = 0.010027 n = 236
Model sBMI + sleep apnea + atelectasis percent:
model_OSA_atel <-gam(spo2_fraction ~ s(BMI,k=5) + sleep_apnea +
atelectasis_percent,
data=datamodel,
family = quasibinomial(link = logit)
)
R2_OSA_atel <- summary(model_OSA_atel)$r.sq
dev_OSA_atel <- summary(model_OSA_atel)$dev.expl
summary(model_OSA_atel)##
## Family: quasibinomial
## Link function: logit
##
## Formula:
## spo2_fraction ~ s(BMI, k = 5) + sleep_apnea + atelectasis_percent
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.29754 0.03567 92.437 < 2e-16 ***
## sleep_apneaYes -0.07729 0.07824 -0.988 0.32424
## atelectasis_percent2.5% -0.52072 0.10945 -4.758 3.49e-06 ***
## atelectasis_percent5% -0.75059 0.09298 -8.073 4.07e-14 ***
## atelectasis_percent7.5% -0.81492 0.07150 -11.398 < 2e-16 ***
## atelectasis_percent10% -0.91640 0.12879 -7.115 1.46e-11 ***
## atelectasis_percent12.5% -0.80485 0.29904 -2.691 0.00765 **
## atelectasis_percent15% -0.93410 0.14857 -6.287 1.65e-09 ***
## atelectasis_percent17.5% -1.10427 0.12460 -8.863 2.37e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(BMI) 1.001 1.002 1.63 0.203
##
## R-sq.(adj) = 0.679 Deviance explained = 63.1%
## GCV = 0.0062253 Scale est. = 0.0063143 n = 236
##
## Method: GCV Optimizer: outer newton
## full convergence after 6 iterations.
## Gradient range [-6.474023e-09,-6.474023e-09]
## (score 0.006225264 & scale 0.006314294).
## Hessian positive definite, eigenvalue range [6.511298e-09,6.511298e-09].
## Model rank = 13 / 13
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(BMI) 4 1 1.1 0.94
Fit model adjusted for confounders:
model_adj<-gam(spo2_fraction~s(BMI,k=5)+sex+age+sleep_apnea+hb+altitude_cat,
data=datamodel,
na.action=na.omit,
family = quasibinomial(link = logit)
)
R2_adj <- summary(model_adj)$r.sq
dev_adj <- summary(model_adj)$dev.expl
summary(model_adj)##
## Family: quasibinomial
## Link function: logit
##
## Formula:
## spo2_fraction ~ s(BMI, k = 5) + sex + age + sleep_apnea + hb +
## altitude_cat
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.38710 0.39225 8.635 1.1e-15 ***
## sexMan 0.12972 0.11249 1.153 0.250
## age -0.00121 0.00315 -0.384 0.701
## sleep_apneaYes -0.47773 0.10250 -4.661 5.4e-06 ***
## hb -0.02217 0.02645 -0.838 0.403
## altitude_catModerate altitude -0.02806 0.08869 -0.316 0.752
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(BMI) 3.493 3.852 29.68 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.42 Deviance explained = 39.5%
## GCV = 0.010105 Scale est. = 0.010041 n = 234
##
## Method: GCV Optimizer: outer newton
## full convergence after 6 iterations.
## Gradient range [2.615768e-09,2.615768e-09]
## (score 0.01010485 & scale 0.01004117).
## Hessian positive definite, eigenvalue range [2.720599e-05,2.720599e-05].
## Model rank = 10 / 10
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(BMI) 4.00 3.49 1.1 0.93
Fit adjusted model plus atelectasis percentage:
model_plus<-gam(spo2_fraction ~ s(BMI,k=5) +
sex + age + sleep_apnea + hb + altitude_cat +
atelectasis_percent,
data=datamodel,
na.action=na.omit,
family = quasibinomial(link = logit)
)
R2_plus <- summary(model_plus)$r.sq
dev_plus <- summary(model_plus)$dev.expl
summary(model_plus)##
## Family: quasibinomial
## Link function: logit
##
## Formula:
## spo2_fraction ~ s(BMI, k = 5) + sex + age + sleep_apnea + hb +
## altitude_cat + atelectasis_percent
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.2635864 0.3054348 10.685 < 2e-16 ***
## sexMan 0.0488753 0.0900997 0.542 0.58805
## age -0.0008131 0.0025857 -0.314 0.75348
## sleep_apneaYes -0.0902632 0.0857037 -1.053 0.29341
## hb 0.0048886 0.0207568 0.236 0.81402
## altitude_catModerate altitude -0.0499119 0.0704348 -0.709 0.47931
## atelectasis_percent2.5% -0.5158425 0.1097937 -4.698 4.62e-06 ***
## atelectasis_percent5% -0.7556435 0.0940532 -8.034 5.71e-14 ***
## atelectasis_percent7.5% -0.8190971 0.0719975 -11.377 < 2e-16 ***
## atelectasis_percent10% -0.9343044 0.1303743 -7.166 1.15e-11 ***
## atelectasis_percent12.5% -0.8041808 0.3025669 -2.658 0.00844 **
## atelectasis_percent15% -0.9382125 0.1503699 -6.239 2.23e-09 ***
## atelectasis_percent17.5% -1.1090567 0.1276979 -8.685 8.69e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(BMI) 1.157 1.298 1.225 0.316
##
## R-sq.(adj) = 0.68 Deviance explained = 63.8%
## GCV = 0.0062941 Scale est. = 0.0062863 n = 234
##
## Method: GCV Optimizer: outer newton
## full convergence after 3 iterations.
## Gradient range [-6.457525e-10,-6.457525e-10]
## (score 0.006294057 & scale 0.006286255).
## Hessian positive definite, eigenvalue range [9.039609e-07,9.039609e-07].
## Model rank = 17 / 17
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(BMI) 4.00 1.16 1.05 0.78
Build a dataframe to compare models:
models <- data.frame(Model = c("empty",
"sBMI",
"OSA_only",
"atel_only",
"sBMI_atel",
"sBMI_OSA",
"sBMI_atel_OSA",
"adjusted",
"adjusted_plus_atel"),
aR2 = c(R2_empty,
R2_BMI,
R2_OSA_only,
R2_atel_only,
R2_atel,
R2_OSA,
R2_OSA_atel,
R2_adj,
R2_plus),
dev = c(dev_empty,
dev_BMI,
dev_OSA_only,
dev_atel_only,
dev_atel,
dev_OSA,
dev_OSA_atel,
dev_adj,
dev_plus)
)Sort by deviance
models <- models %>% mutate(aR2 = round(aR2,3)*100,
dev = round(dev,3)*100
)
models %>% arrange(desc(dev))## Model aR2 dev
## 1 adjusted_plus_atel 68.0 63.8
## 2 sBMI_atel_OSA 67.9 63.1
## 3 sBMI_atel 67.8 62.9
## 4 atel_only 67.6 62.6
## 5 adjusted 42.0 39.5
## 6 sBMI_OSA 42.5 38.8
## 7 sBMI 37.9 32.9
## 8 OSA_only 11.5 10.3
## 9 empty 0.0 0.0
Sort by R2
## Model aR2 dev
## 1 adjusted_plus_atel 68.0 63.8
## 2 sBMI_atel_OSA 67.9 63.1
## 3 sBMI_atel 67.8 62.9
## 4 atel_only 67.6 62.6
## 5 sBMI_OSA 42.5 38.8
## 6 adjusted 42.0 39.5
## 7 sBMI 37.9 32.9
## 8 OSA_only 11.5 10.3
## 9 empty 0.0 0.0
Create table for models:
tab_BMI_only <- tbl_regression(model_BMI,
exponentiate = TRUE,
tidy_fun = gtsummary::tidy_gam)
tab_OSA_only <- tbl_regression(model_OSA_only,
exponentiate = TRUE,
tidy_fun = gtsummary::tidy_gam)
tab_atel_only <- tbl_regression(model_atel_only,
exponentiate = TRUE,
tidy_fun = gtsummary::tidy_gam)
tab_OSA <- tbl_regression(model_OSA,
exponentiate = TRUE,
tidy_fun = gtsummary::tidy_gam)
tab_atel <- tbl_regression(model_atel,
exponentiate = TRUE,
tidy_fun = gtsummary::tidy_gam)
tab_OSA_atel <- tbl_regression(model_OSA_atel,
exponentiate = TRUE,
tidy_fun = gtsummary::tidy_gam)
tab_adj <- tbl_regression(model_adj,
exponentiate = TRUE,
tidy_fun = gtsummary::tidy_gam)
tab_plus <- tbl_regression(model_plus,
exponentiate = TRUE,
tidy_fun = gtsummary::tidy_gam)
tableS3 <- tbl_stack(
list(tab_BMI_only,tab_OSA_only,tab_atel_only,tab_OSA,tab_atel,tab_OSA_atel,tab_adj,tab_plus),
group_header = c("BMI only", "OSA only", "Atelectasis percent only", "BMI + OSA", "BMI + Atelectasis percent", "BMI + OSA + Atelectasis percent", "BMI + OSA + age + sex + hb + altitude","Fully adjusted model")
)
tableS3| Characteristic | OR1 | 95% CI1 | p-value |
|---|---|---|---|
| BMI only | |||
| s(BMI) | <0.001 | ||
| OSA only | |||
| sleep_apnea | |||
| No | — | — | |
| Yes | 0.55 | 0.44, 0.68 | <0.001 |
| Atelectasis percent only | |||
| atelectasis_percent | |||
| 0% | — | — | |
| 2.5% | 0.58 | 0.47, 0.72 | <0.001 |
| 5% | 0.45 | 0.38, 0.53 | <0.001 |
| 7.5% | 0.42 | 0.37, 0.47 | <0.001 |
| 10% | 0.37 | 0.29, 0.47 | <0.001 |
| 12.5% | 0.42 | 0.24, 0.75 | 0.004 |
| 15% | 0.37 | 0.28, 0.49 | <0.001 |
| 17.5% | 0.30 | 0.24, 0.36 | <0.001 |
| BMI + OSA | |||
| sleep_apnea | |||
| No | — | — | |
| Yes | 0.64 | 0.53, 0.77 | <0.001 |
| s(BMI) | <0.001 | ||
| BMI + Atelectasis percent | |||
| atelectasis_percent | |||
| 0% | — | — | |
| 2.5% | 0.58 | 0.47, 0.72 | <0.001 |
| 5% | 0.47 | 0.39, 0.56 | <0.001 |
| 7.5% | 0.44 | 0.38, 0.50 | <0.001 |
| 10% | 0.39 | 0.30, 0.50 | <0.001 |
| 12.5% | 0.45 | 0.25, 0.80 | 0.008 |
| 15% | 0.38 | 0.29, 0.51 | <0.001 |
| 17.5% | 0.32 | 0.26, 0.41 | <0.001 |
| s(BMI) | 0.2 | ||
| BMI + OSA + Atelectasis percent | |||
| sleep_apnea | |||
| No | — | — | |
| Yes | 0.93 | 0.79, 1.08 | 0.3 |
| atelectasis_percent | |||
| 0% | — | — | |
| 2.5% | 0.59 | 0.48, 0.74 | <0.001 |
| 5% | 0.47 | 0.39, 0.57 | <0.001 |
| 7.5% | 0.44 | 0.38, 0.51 | <0.001 |
| 10% | 0.40 | 0.31, 0.51 | <0.001 |
| 12.5% | 0.45 | 0.25, 0.80 | 0.008 |
| 15% | 0.39 | 0.29, 0.53 | <0.001 |
| 17.5% | 0.33 | 0.26, 0.42 | <0.001 |
| s(BMI) | 0.2 | ||
| BMI + OSA + age + sex + hb + altitude | |||
| sex | |||
| Woman | — | — | |
| Man | 1.14 | 0.91, 1.42 | 0.3 |
| Age | 1.00 | 0.99, 1.00 | 0.7 |
| sleep_apnea | |||
| No | — | — | |
| Yes | 0.62 | 0.51, 0.76 | <0.001 |
| Hemoglobin | 0.98 | 0.93, 1.03 | 0.4 |
| altitude_cat | |||
| Low altitude | — | — | |
| Moderate altitude | 0.97 | 0.82, 1.16 | 0.8 |
| s(BMI) | <0.001 | ||
| Fully adjusted model | |||
| sex | |||
| Woman | — | — | |
| Man | 1.05 | 0.88, 1.25 | 0.6 |
| Age | 1.00 | 0.99, 1.00 | 0.8 |
| sleep_apnea | |||
| No | — | — | |
| Yes | 0.91 | 0.77, 1.08 | 0.3 |
| Hemoglobin | 1.00 | 0.96, 1.05 | 0.8 |
| altitude_cat | |||
| Low altitude | — | — | |
| Moderate altitude | 0.95 | 0.83, 1.09 | 0.5 |
| atelectasis_percent | |||
| 0% | — | — | |
| 2.5% | 0.60 | 0.48, 0.74 | <0.001 |
| 5% | 0.47 | 0.39, 0.56 | <0.001 |
| 7.5% | 0.44 | 0.38, 0.51 | <0.001 |
| 10% | 0.39 | 0.30, 0.51 | <0.001 |
| 12.5% | 0.45 | 0.25, 0.81 | 0.008 |
| 15% | 0.39 | 0.29, 0.53 | <0.001 |
| 17.5% | 0.33 | 0.26, 0.42 | <0.001 |
| s(BMI) | 0.3 | ||
| 1 OR = Odds Ratio, CI = Confidence Interval | |||
Save Table S3
Check residuals:
Now, plot take the inverse logit function to assess partial effect on mean SpO2.
Draw a personalized plot:
plot3a <- draw(model_BMI,
constant = coef(model_BMI)[1],
fun = inv_link(model_BMI),
smooth_col = "cadetblue4"
) +
scale_y_continuous(labels = scales::percent, limits=c(0.80,1)) +
labs(x="Body mass index (kg/m²)",
y = "Partial effect (mean SpO2)",
title = "s(BMI)",
subtitle=paste0("Deviance explained:"," ",(round(dev_BMI,3)*100),"%"),
tag="A",
caption=NULL) +
theme_bw() +
theme(panel.border = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(colour = "black"),
axis.text.x = element_text(size=rel(1.2)),
axis.text.y = element_text(size=rel(1.2))
)
plot3a Check residuals:
Partial effect on mean SpO2:
plot3b <- draw(model_OSA,
constant = coef(model_OSA)[1],
fun = inv_link(model_OSA),
smooth_col = "cadetblue4"
) +
scale_y_continuous(labels = scales::percent, limits=c(0.80,1)) +
labs(x="Body mass index (kg/m²)",
y = "Partial effect (mean SpO2)",
title = "s(BMI) + OSA",
subtitle=paste0("Deviance explained:"," ",(round(dev_OSA,3)*100),"%"),
tag="B",
caption=NULL) +
theme_bw() +
theme(panel.border = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(colour = "black"),
axis.text.x = element_text(size=rel(1.2)),
axis.text.y = element_text(size=rel(1.2))
)
plot3b Check residuals:
Partial effect on mean SpO2:
plot3c <- draw(model_atel,
constant = coef(model_atel)[1],
fun = inv_link(model_atel),
smooth_col = "cadetblue4"
) +
scale_y_continuous(labels = scales::percent, limits=c(0.80,1)) +
labs(x="Body mass index (kg/m²)",
y = "Partial effect (mean SpO2)",
title = "s(BMI) + atelectasis percent",
subtitle=paste0("Deviance explained:"," ",(round(dev_atel,3)*100),"%"),
tag="C",
caption=NULL) +
theme_bw() +
theme(panel.border = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(colour = "black"),
axis.text.x = element_text(size=rel(1.2)),
axis.text.y = element_text(size=rel(1.2))
)
plot3c Check residuals:
Partial effect on mean SpO2:
plot3d <- draw(model_plus,
constant = coef(model_plus)[1],
fun = inv_link(model_plus),
smooth_col = "cadetblue4"
) +
scale_y_continuous(labels = scales::percent, limits=c(0.80,1)) +
labs(x="Body mass index (kg/m²)",
y = "Partial effect (mean SpO2)",
title = "Fully adjusted model",
subtitle=paste0("Deviance explained:"," ",(round(dev_plus,3)*100),"%"),
tag="D",
caption=NULL) +
theme_bw() +
theme(panel.border = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(colour = "black"),
axis.text.x = element_text(size=rel(1.2)),
axis.text.y = element_text(size=rel(1.2))
)
plot3d In figure 2, it can be observed that SpO2 starts decreasing at BMIs
above 40. Thus, by having used the WHO obesity class categories, detail
on differences above BMI 40 for the extent of atelectasis percentage may
have been lost. The WHO obesity class categories do not reflect the
extent of variation in BMI observed in this sample of patients:
- Class 1, BMI [30,35): ~1/4 participants
- Class 2, BMI [35,40): ~1/4 participants - Class 3, BMI >40: ~1/2 of
participants, with a median BMI above a 5 units range: 45.56.
Thus, creating subcategories within the class 3 obesity may allow to assess the impact of BMI increases above 40 on atelectasis percentage with more detail.
Create new variable with the following categories:
- BMI [30,35)
- BMI [35,40)
- BMI [40,45)
- BMI [44,50)
- BMI >50
data_prop <- data_prop %>%
mutate(BMI_breaks = cut(BMI,
breaks=c(30,35,40,45,50,80),
right=FALSE,
labels=c("[30,35)","[35,40)","[40,45)","[45,50)","≥50")
)
)## BMI_breaks
## [30,35) [35,40) [40,45) [45,50) ≥50
## 63 53 57 31 32
## BMI_breaks
## [30,35) [35,40) [40,45) [45,50) ≥50
## 26.7 22.5 24.2 13.1 13.6
Mean expected frequency:
mean_exp <- data_prop %>% summarize(mean_expected_freq = n()/(nlevels(BMI_breaks)*nlevels(atelectasis)))
mean_exp## mean_expected_freq
## 1 23.6
## atelectasis
## BMI_breaks Yes No
## [30,35) 8 55
## [35,40) 15 38
## [40,45) 7 50
## [45,50) 15 16
## ≥50 32 0
## atelectasis
## BMI_breaks Yes No
## [30,35) 0.0339 0.2331
## [35,40) 0.0636 0.1610
## [40,45) 0.0297 0.2119
## [45,50) 0.0636 0.0678
## ≥50 0.1356 0.0000
Mosaic Plot
data_prop %>% mutate(atelectasis = fct_relevel(atelectasis, "No", "Yes")) %>%
ggplot() +
geom_mosaic(aes(x = product(atelectasis,BMI_breaks), fill=atelectasis),na.rm = TRUE) +
scale_fill_manual(values=c("grey95","lightsteelblue4")) +
theme_mosaic() +
theme(axis.text.x=element_text(size=rel(0.8)))##
## Pearson's Chi-squared test
##
## data: freq
## X-squared = 92.149, df = 4, p-value < 2.2e-16
Mean expected frequency:
mean_exp <- data_prop %>% drop_na(BMI_breaks, atelectasis_location) %>% summarize(mean_expected_freq = n()/(nlevels(BMI_breaks)*nlevels(atelectasis_location)))
mean_exp## mean_expected_freq
## 1 7.7
Mean expected frequency is greater than 5.0, so chi-squared without continuity correction is adequate.
## atelectasis_location
## BMI_breaks Unilateral Bilateral
## [30,35) 7 1
## [35,40) 10 5
## [40,45) 3 4
## [45,50) 11 4
## ≥50 22 10
## atelectasis_location
## BMI_breaks Unilateral Bilateral
## [30,35) 0.0909 0.0130
## [35,40) 0.1299 0.0649
## [40,45) 0.0390 0.0519
## [45,50) 0.1429 0.0519
## ≥50 0.2857 0.1299
Mosaic Plot
ggplot(data_prop = data_prop) +
geom_mosaic(aes(x = product(BMI_breaks,atelectasis_location), fill=BMI_breaks),na.rm = TRUE) +
scale_fill_manual(values=c("seagreen1","seagreen2","seagreen3","seagreen4","seagreen")) +
theme_mosaic() ##
## Pearson's Chi-squared test
##
## data: freq
## X-squared = 3.6755, df = 4, p-value = 0.4517
#Patients who had atelectasis (Yes/No)
atelectasis_total <- data_prop %>% group_by(atelectasis) %>%
summarise(n = n()) %>%
mutate(prev = n / sum(n)*100,
lower = lapply(n, prop.test, n = sum(n)),
upper = sapply(lower, function(x) x$conf.int[2])*100,
lower = sapply(lower, function(x) x$conf.int[1])*100,
BMI_breaks = "Total") %>%
mutate_at(3:5, round, 2)
atelectasis_total$confint <- paste(atelectasis_total$lower, "-", atelectasis_total$upper)
atelectasis_total <- atelectasis_total %>% dplyr::select(-c(lower,upper))
atelectasis_obesity <- data_prop %>% group_by(BMI_breaks, atelectasis) %>%
summarise(n = n()) %>%
mutate(prev = n / sum(n)*100,
lower = lapply(n, prop.test, n = sum(n)),
upper = sapply(lower, function(x) x$conf.int[2])*100,
lower = sapply(lower, function(x) x$conf.int[1])*100) %>%
mutate_at(4:6, round, 2)
atelectasis_obesity$confint <- paste(atelectasis_obesity$lower, "-", atelectasis_obesity$upper)
atelectasis_obesity <- atelectasis_obesity %>% dplyr::select(-c(lower,upper))
atelectasis <- atelectasis_total %>% bind_rows(atelectasis_obesity) %>% relocate(BMI_breaks, .before = n)
#Location of atelectasis (Unilateral/Bilateral)
atelectasis_total_location <- data_prop %>% filter(atelectasis=="Yes") %>% group_by(atelectasis_location) %>% summarise(Freq = n()) %>% mutate(Percentage = (round((Freq/sum(Freq)*100),digits=2)), BMI_breaks = "Total")
atelectasis_total_location$sumpercent <- paste(atelectasis_total_location$Freq," (", atelectasis_total_location$Percentage, "%)")
atelectasis_total_location <- atelectasis_total_location %>% dplyr::select(-c(Freq,Percentage)) %>% pivot_wider(names_from = atelectasis_location,values_from = sumpercent)
atelectasis_obesity_location <- data_prop %>% filter(atelectasis=="Yes") %>% group_by(BMI_breaks, atelectasis_location) %>% summarise(Freq = n()) %>% mutate(Percentage = (round((Freq/sum(Freq)*100),digits=2)))
atelectasis_obesity_location$sumpercent <- paste(atelectasis_obesity_location$Freq," (", atelectasis_obesity_location$Percentage, "%)")
atelectasis_obesity_location <- atelectasis_obesity_location %>% dplyr::select(-c(Freq,Percentage)) %>% pivot_wider(names_from = atelectasis_location,values_from = sumpercent)
location <- atelectasis_total_location %>% bind_rows(atelectasis_obesity_location)
atelectasis <- atelectasis %>% right_join(location,by="BMI_breaks") %>% mutate(confint=replace(confint, atelectasis=="No", NA), Unilateral=replace(Unilateral, atelectasis=="No", NA), Bilateral=replace(Bilateral, atelectasis=="No", NA))
atelectasis## # A tibble: 11 × 7
## atelectasis BMI_breaks n prev confint Unilateral Bilateral
## <fct> <chr> <int> <dbl> <chr> <chr> <chr>
## 1 Yes Total 77 32.6 26.77 - 39.06 53 ( 68.83 %) 24 ( 31.17 …
## 2 No Total 159 67.4 <NA> <NA> <NA>
## 3 Yes [30,35) 8 12.7 6.03 - 24.04 7 ( 87.5 %) 1 ( 12.5 %)
## 4 No [30,35) 55 87.3 <NA> <NA> <NA>
## 5 Yes [35,40) 15 28.3 17.2 - 42.56 10 ( 66.67 %) 5 ( 33.33 %)
## 6 No [35,40) 38 71.7 <NA> <NA> <NA>
## 7 Yes [40,45) 7 12.3 5.49 - 24.29 3 ( 42.86 %) 4 ( 57.14 %)
## 8 No [40,45) 50 87.7 <NA> <NA> <NA>
## 9 Yes [45,50) 15 48.4 30.56 - 66.6 11 ( 73.33 %) 4 ( 26.67 %)
## 10 No [45,50) 16 51.6 <NA> <NA> <NA>
## 11 Yes ≥50 32 100 86.66 - 100 22 ( 68.75 %) 10 ( 31.25 …
Mean atelectasis percentage coverage by BMI breaks:
If inoring zeo-inflation and skewness:
data %>% mutate(BMI_breaks = cut(BMI,breaks=c(30,35,40,45,50,80),right=FALSE,
labels=c("[30,35)","[35,40)","[40,45)","[45,50)","≥50")),
atelectasis_percent=as.numeric(atelectasis_percent)) %>%
group_by(BMI_breaks) %>%
summarize(
mean = mean(atelectasis_percent),
sd = sd(atelectasis_percent)
) ## # A tibble: 5 × 3
## BMI_breaks mean sd
## <fct> <dbl> <dbl>
## 1 [30,35) 0.913 2.89
## 2 [35,40) 1.56 3.15
## 3 [40,45) 0.702 2.05
## 4 [45,50) 3.63 4.22
## 5 ≥50 10.5 5.40
As is evident from these numbers, assuming normality causes standard deviation to capture negative values, which is impossible in reality for this variable.
Thus, bootstrapping mean and 95%CI is expected to lead to more appropriate estimates:
data_class_1 <- data %>%
mutate(BMI_breaks = cut(BMI,breaks=c(30,35,40,45,50,80),right=FALSE,
labels=c("[30,35)","[35,40)","[40,45)","[45,50)","≥50")),
atelectasis_percent=as.numeric(atelectasis_percent)) %>%
group_by(BMI_breaks) %>%
filter(BMI_breaks=="[40,45)")
data_class_2 <- data %>%
mutate(BMI_breaks = cut(BMI,breaks=c(30,35,40,45,50,80),right=FALSE,
labels=c("[30,35)","[35,40)","[40,45)","[45,50)","≥50")),
atelectasis_percent=as.numeric(atelectasis_percent)) %>%
group_by(BMI_breaks) %>%
filter(BMI_breaks=="[45,50)")
data_class_3 <- data %>%
mutate(BMI_breaks = cut(BMI,breaks=c(30,35,40,45,50,80),right=FALSE,
labels=c("[30,35)","[35,40)","[40,45)","[45,50)","≥50")),
atelectasis_percent=as.numeric(atelectasis_percent)) %>%
group_by(BMI_breaks) %>%
filter(BMI_breaks=="≥50") BMI [40,45)
boot_class1 <- one.boot(data_class_1$atelectasis_percent, mean, R=10000)
mean_boot_class1 <- mean(boot_class1$t)
mean_boot_class1## [1] 0.7016447
## BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
## Based on 10000 bootstrap replicates
##
## CALL :
## boot.ci(boot.out = boot_class1)
##
## Intervals :
## Level Normal Basic
## 95% ( 0.1738, 1.2300 ) ( 0.1316, 1.1842 )
##
## Level Percentile BCa
## 95% ( 0.2193, 1.2719 ) ( 0.2193, 1.2719 )
## Calculations and Intervals on Original Scale
BMI [44,50)
boot_class2 <- one.boot(data_class_2$atelectasis_percent, mean, R=10000)
mean_boot_class2 <- mean(boot_class2$t)
mean_boot_class2## [1] 3.620282
## BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
## Based on 10000 bootstrap replicates
##
## CALL :
## boot.ci(boot.out = boot_class2)
##
## Intervals :
## Level Normal Basic
## 95% ( 2.174, 5.102 ) ( 2.097, 5.081 )
##
## Level Percentile BCa
## 95% ( 2.177, 5.161 ) ( 2.177, 5.081 )
## Calculations and Intervals on Original Scale
boot_class3 <- one.boot(data_class_3$atelectasis_percent, mean, R=10000)
mean_boot_class3 <- mean(boot_class3$t)
mean_boot_class3## [1] 10.47598
## BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
## Based on 10000 bootstrap replicates
##
## CALL :
## boot.ci(boot.out = boot_class3)
##
## Intervals :
## Level Normal Basic
## 95% ( 8.62, 12.30 ) ( 8.52, 12.19 )
##
## Level Percentile BCa
## 95% ( 8.75, 12.42 ) ( 8.67, 12.34 )
## Calculations and Intervals on Original Scale
The mean atelectasis percentage according to class 3 subgroups was: BMI [40,45) (0.7%, 95%CI:0.22-1.27), BMI [44,50) (3.62%, 95%CI:2.18-5.08), and BMI >50 (10.48%, 95%CI:8.67-12.34).
Mean expected frequency:
mean_exp <- data_prop %>% mutate(atelectasis_percent=factor(atelectasis_percent)) %>% summarize(mean_expected_freq = n()/(nlevels(BMI_breaks)*nlevels(atelectasis_percent)))
mean_exp## mean_expected_freq
## 1 11.8
Mean expected frequency is greater than 5.0, so chi-squared without continuity correction is adequate.
## type_obesity
## atelectasis_percent 1 2 3
## 0% 56 45 69
## 5% 5 6 36
## 10% 0 1 6
## 15% 1 1 10
## type_obesity
## atelectasis_percent 1 2 3
## 0% 0.237288136 0.190677966 0.292372881
## 5% 0.021186441 0.025423729 0.152542373
## 10% 0.000000000 0.004237288 0.025423729
## 15% 0.004237288 0.004237288 0.042372881
## type_obesity
## atelectasis_percent 1 2 3
## 0% 0.90322581 0.84905660 0.57024793
## 5% 0.08064516 0.11320755 0.29752066
## 10% 0.00000000 0.01886792 0.04958678
## 15% 0.01612903 0.01886792 0.08264463
## BMI_breaks
## atelectasis_percent [30,35) [35,40) [40,45) [45,50) ≥50
## 0% 57 45 52 16 0
## 5% 5 6 5 12 19
## 10% 0 1 0 2 4
## 15% 1 1 0 1 9
## BMI_breaks
## atelectasis_percent [30,35) [35,40) [40,45) [45,50) ≥50
## 0% 0.241525424 0.190677966 0.220338983 0.067796610 0.000000000
## 5% 0.021186441 0.025423729 0.021186441 0.050847458 0.080508475
## 10% 0.000000000 0.004237288 0.000000000 0.008474576 0.016949153
## 15% 0.004237288 0.004237288 0.000000000 0.004237288 0.038135593
## BMI_breaks
## atelectasis_percent [30,35) [35,40) [40,45) [45,50) ≥50
## 0% 0.90476190 0.84905660 0.91228070 0.51612903 0.00000000
## 5% 0.07936508 0.11320755 0.08771930 0.38709677 0.59375000
## 10% 0.00000000 0.01886792 0.00000000 0.06451613 0.12500000
## 15% 0.01587302 0.01886792 0.00000000 0.03225806 0.28125000
##
## Pearson's Chi-squared test
##
## data: tab
## X-squared = 126.47, df = 12, p-value < 2.2e-16
The relative frequencies of atelectasis percentage according to BMI breaks were significantly different (Figure 2b, p<0.001).
barplot(prop_fig2b,beside=TRUE,ylim=c(0,1),xlim=c(0,34),ylab="Relative frequency",xlab="Body mass index (kg/m²)",
col=brewer.pal(4,"Blues"),
legend.text=c("0-5%","5-10%","10-15%","≥15%"),
space = c(0.2, 1.5)
)png(filename=paste(figfolder,"/Figure2b.png",sep=""),width=8, height=5, units="in", res=300)
Figure2b
dev.offI only want to show frequencies for the subcategories of class 3 obesity.
Rebuild plots and stack them:
layout(matrix(c(1,2), ncol=2), widths=c(5,5))
par(mgp=c(0,2,0))
barplot(prop_fig2a,beside=TRUE,ylim=c(0,1),yaxt='n',
main="A",adj=0,
names.arg=expression(atop("[30,35)","Class 1"),atop("[35,40)","Class 2"),atop("≥40","Class 3")),
col=brewer.pal(4,"Blues"),
space = c(0.2, 1.5),
cex.axis = 0.9, cex.names = 0.9
)
axis(2, at=0.5, pos=0, labels="Relative frequency", las=0, tck=0, lwd=0)
axis(2, at=c(0.0,0.2,0.4,0.6,0.8,1.0), labels=FALSE)
axis(2, at=c(0.0,0.2,0.4,0.6,0.8,1.0), labels=c("0.0","0.2","0.4","0.6","0.8","1.0"), pos=3, tck=0, lwd=0, cex.axis=0.9)
par(mgp=c(2,0.5,0))
barplot(prop_fig2b,beside=TRUE,ylim=c(0,1),
main="B",adj=0,
xlab=" Class 3 subgroups (kg/m²)", cex.lab = 0.9,
col=brewer.pal(4,"Blues"),
legend.text=c("0-5%","5-10%","10-15%","≥15%"), args.legend = c(y=1.1,cex=0.8),
space = c(0.2, 1.5),
cex.axis = 0.9, cex.names = 0.9
)dataprev <- data_prop %>% mutate(atelectasis = as.numeric(as.character(fct_recode(atelectasis,"0"="No","1"="Yes"))))
poisson_fit <- glm(atelectasis ~ BMI_breaks,
data = dataprev,
family = poisson(link = log))
tidy(poisson_fit, exponentiate = TRUE, conf.int = TRUE)## # A tibble: 5 × 7
## term estimate std.error statistic p.value conf.low conf.high
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 0.127 0.354 -5.84 5.31e-9 0.0580 0.236
## 2 BMI_breaks[35,40) 2.23 0.438 1.83 6.72e-2 0.968 5.54
## 3 BMI_breaks[40,45) 0.967 0.518 -0.0646 9.48e-1 0.339 2.69
## 4 BMI_breaks[45,50) 3.81 0.438 3.06 2.25e-3 1.66 9.46
## 5 BMI_breaks≥50 7.87 0.395 5.22 1.78e-7 3.82 18.4
Robust standard errors:
## (Intercept) BMI_breaks[35,40) BMI_breaks[40,45)
## (Intercept) 0.109127 -0.1091270 -0.1091270
## BMI_breaks[35,40) -0.109127 0.1569257 0.1091270
## BMI_breaks[40,45) -0.109127 0.1091270 0.2344403
## BMI_breaks[45,50) -0.109127 0.1091270 0.1091270
## BMI_breaks≥50 -0.109127 0.1091270 0.1091270
## BMI_breaks[45,50) BMI_breaks≥50
## (Intercept) -0.1091270 -0.109127
## BMI_breaks[35,40) 0.1091270 0.109127
## BMI_breaks[40,45) 0.1091270 0.109127
## BMI_breaks[45,50) 0.1435356 0.109127
## BMI_breaks≥50 0.1091270 0.109127
#Calculate the standard error
se <- sqrt(diag(covmat))
# Bind together model output
# 1. exponentiated coefficients
# 2. robust standard errors
# 3. 95% confidence intervals
# Note that qnorm(0.975) approximately equals 1.96
model_output <- cbind(
Estimate = exp(coef(poisson_fit)),
`Robust SE` = se,
Lower = exp(coef(poisson_fit) - qnorm(0.975) * se),
Upper = exp(coef(poisson_fit) + qnorm(0.975) * se)
)
# Coerce model_output into a data frame
# Return second row to focus on the weekheart variable
model_output <- as.data.frame(round(model_output,2))
model_output## Estimate Robust SE Lower Upper
## (Intercept) 0.13 0.33 0.07 0.24
## BMI_breaks[35,40) 2.23 0.40 1.03 4.84
## BMI_breaks[40,45) 0.97 0.48 0.37 2.50
## BMI_breaks[45,50) 3.81 0.38 1.81 8.01
## BMI_breaks≥50 7.87 0.33 4.12 15.05
poisson_fit <- glm(atelectasis ~ BMI_breaks + age + sex + sleep_apnea,
data = dataprev,
family = poisson(link = log))
tidy(poisson_fit, exponentiate = TRUE, conf.int = TRUE)## # A tibble: 8 × 7
## term estimate std.error statistic p.value conf.low conf.high
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 0.109 0.616 -3.60 0.000319 0.0310 0.349
## 2 BMI_breaks[35,40) 2.17 0.439 1.76 0.0779 0.940 5.39
## 3 BMI_breaks[40,45) 0.909 0.521 -0.184 0.854 0.317 2.55
## 4 BMI_breaks[45,50) 3.35 0.443 2.73 0.00633 1.44 8.39
## 5 BMI_breaks≥50 7.01 0.403 4.83 0.00000135 3.34 16.5
## 6 age 1.00 0.0120 0.211 0.833 0.979 1.03
## 7 sexMan 0.877 0.375 -0.351 0.725 0.399 1.75
## 8 sleep_apneaYes 2.73 0.307 3.27 0.00109 1.45 4.86
Robust standard errors:
## (Intercept) BMI_breaks[35,40) BMI_breaks[40,45)
## (Intercept) 0.177554148 -0.102924823 -0.0877845754
## BMI_breaks[35,40) -0.102924823 0.141303486 0.0976206338
## BMI_breaks[40,45) -0.087784575 0.097620634 0.1992085966
## BMI_breaks[45,50) -0.102133326 0.101540303 0.0976068644
## BMI_breaks≥50 -0.107113830 0.100485122 0.0952267231
## age -0.001925166 0.000117901 -0.0002658152
## sexMan -0.001046827 0.009748158 0.0035840805
## sleep_apneaYes 0.003033885 -0.014501059 0.0046586761
## BMI_breaks[45,50) BMI_breaks≥50 age sexMan
## (Intercept) -0.102133326 -0.1071138302 -1.925166e-03 -0.001046827
## BMI_breaks[35,40) 0.101540303 0.1004851223 1.179010e-04 0.009748158
## BMI_breaks[40,45) 0.097606864 0.0952267231 -2.658152e-04 0.003584080
## BMI_breaks[45,50) 0.134135903 0.1017412423 1.057530e-04 0.004249592
## BMI_breaks≥50 0.101741242 0.1047868860 1.934062e-04 0.001146054
## age 0.000105753 0.0001934062 4.696052e-05 -0.000138888
## sexMan 0.004249592 0.0011460540 -1.388880e-04 0.073608454
## sleep_apneaYes -0.022316208 -0.0021893547 -5.242813e-06 -0.023867442
## sleep_apneaYes
## (Intercept) 3.033885e-03
## BMI_breaks[35,40) -1.450106e-02
## BMI_breaks[40,45) 4.658676e-03
## BMI_breaks[45,50) -2.231621e-02
## BMI_breaks≥50 -2.189355e-03
## age -5.242813e-06
## sexMan -2.386744e-02
## sleep_apneaYes 4.567423e-02
#Calculate the standard error
se <- sqrt(diag(covmat))
# Bind together model output
# 1. exponentiated coefficients
# 2. robust standard errors
# 3. 95% confidence intervals
# Note that qnorm(0.975) approximately equals 1.96
model_output2 <- cbind(
Estimate = exp(coef(poisson_fit)),
`Robust SE` = se,
Lower = exp(coef(poisson_fit) - qnorm(0.975) * se),
Upper = exp(coef(poisson_fit) + qnorm(0.975) * se)
)
# Coerce model_output into a data frame
# Return second row to focus on the weekheart variable
model_output2 <- as.data.frame(round(model_output2,2))
model_output2## Estimate Robust SE Lower Upper
## (Intercept) 0.11 0.42 0.05 0.25
## BMI_breaks[35,40) 2.17 0.38 1.04 4.53
## BMI_breaks[40,45) 0.91 0.45 0.38 2.18
## BMI_breaks[45,50) 3.35 0.37 1.63 6.87
## BMI_breaks≥50 7.01 0.32 3.72 13.22
## age 1.00 0.01 0.99 1.02
## sexMan 0.88 0.27 0.52 1.49
## sleep_apneaYes 2.73 0.21 1.80 4.15
model_output <- model_output %>%
dplyr::slice(2:5) %>%
rename(PR = Estimate, SE = `Robust SE`) %>%
mutate("95%CI" = paste(Lower,Upper,sep="-")) %>%
select(-c(Lower,Upper))
model_output2 <- model_output2 %>%
dplyr::slice(2:5) %>%
rename(aPR = Estimate, aSE = `Robust SE`) %>%
mutate("a95%CI" = paste(Lower,Upper,sep="-")) %>%
select(-c(Lower,Upper))
table2 <- dplyr::bind_cols(model_output, model_output2) %>% rownames_to_column(var = "Category") %>% mutate_at("Category", str_replace, "BMI_breaks", "")
flextable(table2) %>%
save_as_docx(path = paste(tabfolder,"/Table2_complement.docx",sep=""))
table2## Category PR SE 95%CI aPR aSE a95%CI
## 1 [35,40) 2.23 0.40 1.03-4.84 2.17 0.38 1.04-4.53
## 2 [40,45) 0.97 0.48 0.37-2.5 0.91 0.45 0.38-2.18
## 3 [45,50) 3.81 0.38 1.81-8.01 3.35 0.37 1.63-6.87
## 4 ≥50 7.87 0.33 4.12-15.05 7.01 0.32 3.72-13.22
Collapse atelectasis percent category as done before:
dataposthoc <- data_prop
dataposthoc$atelectasis_percent <- fct_collapse(dataposthoc$atelectasis_percent,
"0%" = c("0%","2.5%"),
"5%" = c("5%","7.5%"),
"10%" = c("10%","12.5%"),
"15%" = c("15%","17.5%")
)
table(dataposthoc$atelectasis_percent)##
## 0% 5% 10% 15%
## 170 47 7 12
Rename new obesity category breaks levels to facilitate reading of results:
dataposthoc <- dataposthoc %>%
mutate(BMI_breaks=fct_recode(BMI_breaks,
"1"="[30,35)",
"2"="[35,40)",
"3"="[40,45)",
"4"="[45,50)",
"5"="≥50"),
atelectasis_percent=ordered(atelectasis_percent)) Visualize pattern of atelectasis percent increase by obesity type category.
dataposthoc %>%
ggplot(aes(x = BMI_breaks, fill = atelectasis_percent)) +
geom_bar() +
scale_fill_manual(values = brewer.pal(5,"Blues")) +
theme_minimal()The loglog link function will be used since this distribution provides the best fit for the fully adjusted ordinal model:
dataposthoc %>%
nest(data = everything()) %>%
crossing(
link = c("logit", "probit", "cloglog", "loglog", "cauchit"),
threshold = c("flexible", "equidistant")
) %>%
mutate(
mod = map2(
data, link,
~clm(atelectasis_percent ~ BMI_breaks + sex + age + sleep_apnea,
data = .x, link = .y
)
)
) %>%
mutate(
mod_summary = map(
mod,
~glance(.x) %>% mutate(logLik = as.numeric(logLik))
)
) %>%
unnest(mod_summary) %>%
dplyr::select(link, threshold, logLik, edf, AIC, BIC) %>%
arrange(logLik) %>%
gt()| link | threshold | logLik | edf | AIC | BIC |
|---|---|---|---|---|---|
| cloglog | equidistant | -136.2562 | 10 | 292.5125 | 327.1508 |
| cloglog | flexible | -136.2562 | 10 | 292.5125 | 327.1508 |
| probit | equidistant | -124.5685 | 10 | 269.1371 | 303.7754 |
| probit | flexible | -124.5685 | 10 | 269.1371 | 303.7754 |
| cauchit | equidistant | -121.7896 | 10 | 263.5792 | 298.2175 |
| cauchit | flexible | -121.7896 | 10 | 263.5792 | 298.2175 |
| logit | equidistant | -121.1829 | 10 | 262.3658 | 297.0041 |
| logit | flexible | -121.1829 | 10 | 262.3658 | 297.0041 |
| loglog | equidistant | -118.9089 | 10 | 257.8178 | 292.4561 |
| loglog | flexible | -118.9089 | 10 | 257.8178 | 292.4561 |
fit_uni <- clm(atelectasis_percent ~ BMI_breaks, data = dataposthoc, link = "loglog", threshold = "flexible")
summary(fit_uni)## formula: atelectasis_percent ~ BMI_breaks
## data: dataposthoc
##
## link threshold nobs logLik AIC niter max.grad cond.H
## loglog flexible 236 -130.96 275.92 7(0) 6.07e-07 8.2e+01
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## BMI_breaks2 0.49869 0.54030 0.923 0.356
## BMI_breaks3 -0.09443 0.60569 -0.156 0.876
## BMI_breaks4 1.89692 0.48495 3.912 9.17e-05 ***
## BMI_breaks5 3.86819 0.53167 7.276 3.45e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Threshold coefficients:
## Estimate Std. Error z value
## 0%|5% 2.2993 0.4084 5.631
## 5%|10% 4.4187 0.5231 8.448
## 10%|15% 4.9582 0.5550 8.933
Note that AIC (275.92) improved very much respect to initial univariable model with conventional BMI categories (391.33).
## trying + BMI_breaks
## Tests of nominal effects
##
## formula: atelectasis_percent ~ BMI_breaks
## Df logLik AIC LRT Pr(>Chi)
## <none> -130.96 275.92
## BMI_breaks
Traceback of nominal_test function showed failure to converge (error code -1), similar to what happened with first model. Will fit model with nominal term and compare through ANOVA:
fit_BMInom <- clm(atelectasis_percent ~ 1, nominal= ~ BMI_breaks, data = dataposthoc, link = "loglog", threshold = "flexible")## Warning: (-1) Model failed to converge with max|grad| = 190.071 (tol = 1e-06)
## In addition: maximum number of consecutive Newton modifications reached
## formula: atelectasis_percent ~ 1
## nominal: ~BMI_breaks
## data: dataposthoc
##
## link threshold nobs logLik AIC niter max.grad cond.H
## loglog flexible 236 -136.78 303.55 4(0) 1.90e+02 -1.2e+20
##
## Threshold coefficients:
## Estimate Std. Error z value
## 0%|5%.(Intercept) 1.8814 NA NA
## 5%|10%.(Intercept) 3.4172 NA NA
## 10%|15%.(Intercept) 1.3263 NA NA
## 0%|5%.BMI_breaks2 -0.1783 NA NA
## 5%|10%.BMI_breaks2 -0.3933 NA NA
## 10%|15%.BMI_breaks2 2.2080 NA NA
## 0%|5%.BMI_breaks3 0.2068 NA NA
## 5%|10%.BMI_breaks3 1.2968 NA NA
## 10%|15%.BMI_breaks3 0.0000 NA NA
## 0%|5%.BMI_breaks4 -1.4306 NA NA
## 5%|10%.BMI_breaks4 -1.0649 NA NA
## 10%|15%.BMI_breaks4 1.9925 NA NA
## 0%|5%.BMI_breaks5 -3.5989 NA NA
## 5%|10%.BMI_breaks5 -2.6380 NA NA
## 10%|15%.BMI_breaks5 -0.5268 NA NA
## Likelihood ratio tests of cumulative link models:
##
## formula: nominal: link: threshold:
## fit_uni atelectasis_percent ~ BMI_breaks ~1 loglog flexible
## fit_BMInom atelectasis_percent ~ 1 ~BMI_breaks loglog flexible
##
## no.par AIC logLik LR.stat df Pr(>Chisq)
## fit_uni 7 275.92 -130.96
## fit_BMInom 15 303.55 -136.78 -11.629 8 1
Model with nominal term did not converge. Thus, I will only present ordinal model.
uni_BMI_breaks <- tidy(fit_uni, conf.int = T, conf.type = "Wald") %>%
transmute(
term, across(c(estimate, conf.low, conf.high), exp)
) %>%
gt() %>%
fmt_number(c(estimate, conf.low, conf.high), decimals = 2) %>%
cols_merge(c(conf.low, conf.high),
pattern = "{1}—{2}") %>%
cols_label(conf.low = "95%CI")
uni_BMI_breaks| term | estimate | 95%CI |
|---|---|---|
| 0%|5% | 9.97 | 4.48—22.19 |
| 5%|10% | 82.98 | 29.77—231.32 |
| 10%|15% | 142.34 | 47.96—422.43 |
| BMI_breaks2 | 1.65 | 0.57—4.75 |
| BMI_breaks3 | 0.91 | 0.28—2.98 |
| BMI_breaks4 | 6.67 | 2.58—17.24 |
| BMI_breaks5 | 47.86 | 16.88—135.67 |
fit_multi <- clm(atelectasis_percent ~ BMI_breaks + age + sex + sleep_apnea, data = dataposthoc, link = "loglog", threshold = "flexible")
summary(fit_multi)## formula: atelectasis_percent ~ BMI_breaks + age + sex + sleep_apnea
## data: dataposthoc
##
## link threshold nobs logLik AIC niter max.grad cond.H
## loglog flexible 236 -118.91 257.82 8(0) 5.95e-12 2.0e+05
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## BMI_breaks2 0.37446 0.54564 0.686 0.492540
## BMI_breaks3 -0.26260 0.61977 -0.424 0.671779
## BMI_breaks4 1.82476 0.50148 3.639 0.000274 ***
## BMI_breaks5 4.00462 0.55550 7.209 5.64e-13 ***
## age 0.01379 0.01500 0.920 0.357708
## sexMan -0.24020 0.46488 -0.517 0.605375
## sleep_apneaYes 2.04946 0.40905 5.010 5.44e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Threshold coefficients:
## Estimate Std. Error z value
## 0%|5% 2.9807 0.7691 3.875
## 5%|10% 5.3624 0.8786 6.103
## 10%|15% 5.9524 0.9054 6.574
Convergence:
## nobs logLik niter max.grad cond.H logLik.Error
## 236 -118.91 8(0) 5.95e-12 2.0e+05 <1e-10
##
## Estimate Std.Err Gradient Error Cor.Dec Sig.Dig
## 0%|5% 2.98075 0.7691 -1.37e-13 5.91e-16 14 15
## 5%|10% 5.36244 0.8786 -3.22e-15 1.94e-16 15 16
## 10%|15% 5.95239 0.9054 3.33e-15 3.19e-16 15 16
## BMI_breaks2 0.37446 0.5456 -3.62e-15 -1.71e-15 14 14
## BMI_breaks3 -0.26260 0.6198 1.40e-13 2.72e-14 13 13
## BMI_breaks4 1.82476 0.5015 -4.44e-16 -1.25e-15 14 15
## BMI_breaks5 4.00462 0.5555 -6.83e-15 -1.64e-15 14 15
## age 0.01379 0.0150 5.95e-12 4.81e-17 16 15
## sexMan -0.24020 0.4649 3.45e-14 -7.36e-16 14 14
## sleep_apneaYes 2.04946 0.4091 3.64e-14 -4.38e-16 15 16
##
## Eigen values of Hessian:
## 8.306e+04 5.147e+01 2.511e+01 1.092e+01 1.037e+01 7.555e+00 5.777e+00 3.467e+00 1.965e+00 4.067e-01
##
## Convergence message from clm:
## (0) successful convergence
## In addition: Absolute and relative convergence criteria were met
Check proportional odds assumption:
## Tests of nominal effects
##
## formula: atelectasis_percent ~ BMI_breaks + age + sex + sleep_apnea
## Df logLik AIC LRT Pr(>Chi)
## <none> -118.91 257.82
## BMI_breaks
## age
## sex 2 -117.68 259.36 2.4613 0.2921
## sleep_apnea 2 -118.01 260.02 1.7984 0.4069
Proportional odds OK for all variables except obesity type (as already expected) and age. Centering of age should solve this issue:
Re-fit model:
fit_multi2 <- clm(atelectasis_percent ~ BMI_breaks + agec + sex + sleep_apnea, data = dataposthoc, link = "loglog", threshold = "flexible")
summary(fit_multi2)## formula: atelectasis_percent ~ BMI_breaks + agec + sex + sleep_apnea
## data: dataposthoc
##
## link threshold nobs logLik AIC niter max.grad cond.H
## loglog flexible 236 -118.91 257.82 8(0) 1.58e-13 2.5e+02
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## BMI_breaks2 0.3745 0.5456 0.686 0.492540
## BMI_breaks3 -0.2626 0.6198 -0.424 0.671779
## BMI_breaks4 1.8248 0.5015 3.639 0.000274 ***
## BMI_breaks5 4.0046 0.5555 7.209 5.64e-13 ***
## agec 0.5553 0.6037 0.920 0.357708
## sexMan -0.2402 0.4649 -0.517 0.605375
## sleep_apneaYes 2.0495 0.4091 5.010 5.44e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Threshold coefficients:
## Estimate Std. Error z value
## 0%|5% 2.9807 0.7691 3.875
## 5%|10% 5.3624 0.8786 6.103
## 10%|15% 5.9524 0.9054 6.574
Convergence:
## nobs logLik niter max.grad cond.H logLik.Error
## 236 -118.91 8(0) 1.58e-13 2.5e+02 <1e-10
##
## Estimate Std.Err Gradient Error Cor.Dec Sig.Dig
## 0%|5% 2.9807 0.7691 -1.56e-13 -6.22e-15 13 14
## 5%|10% 5.3624 0.8786 1.84e-14 -6.57e-15 13 14
## 10%|15% 5.9524 0.9054 -2.29e-14 -7.65e-15 13 14
## BMI_breaks2 0.3745 0.5456 0.00e+00 -3.27e-15 14 14
## BMI_breaks3 -0.2626 0.6198 1.39e-13 2.48e-14 13 13
## BMI_breaks4 1.8248 0.5015 -4.44e-16 -3.53e-15 14 15
## BMI_breaks5 4.0046 0.5555 1.89e-15 -3.85e-15 14 15
## agec 0.5553 0.6037 1.58e-13 -2.65e-15 14 14
## sexMan -0.2402 0.4649 3.90e-14 2.04e-18 17 17
## sleep_apneaYes 2.0495 0.4091 3.94e-14 -9.18e-16 14 15
##
## Eigen values of Hessian:
## 93.2384 51.4260 24.1693 10.9191 10.1386 6.9042 4.8502 3.4383 1.6303 0.3765
##
## Convergence message from clm:
## (0) successful convergence
## In addition: Absolute and relative convergence criteria were met
Check proportional odds assumption:
## Tests of nominal effects
##
## formula: atelectasis_percent ~ BMI_breaks + agec + sex + sleep_apnea
## Df logLik AIC LRT Pr(>Chi)
## <none> -118.91 257.82
## BMI_breaks
## agec
## sex 2 -117.68 259.36 2.4613 0.2921
## sleep_apnea 2 -118.01 260.02 1.7984 0.4069
Issue not solved. Will report uncentered age as proportional odds assumption is not as relevant for covariates.
Check for scale effects:
## Tests of scale effects
##
## formula: atelectasis_percent ~ BMI_breaks + agec + sex + sleep_apnea
## Df logLik AIC LRT Pr(>Chi)
## <none> -118.91 257.82
## BMI_breaks 4 -115.04 258.08 7.7382 0.1017
## agec 1 -118.82 259.64 0.1754 0.6754
## sex 1 -118.13 258.26 1.5572 0.2121
## sleep_apnea 1 -118.77 259.53 0.2877 0.5917
No scale effects.
Summarize results of first model in table:
tidy(fit_multi, conf.int = T, conf.type = "Wald") %>%
transmute(
term, across(c(estimate, conf.low, conf.high), exp)
) %>%
gt() %>%
fmt_number(c(estimate, conf.low, conf.high), decimals = 2) %>%
cols_merge(c(conf.low, conf.high),
pattern = "{1}—{2}") %>%
cols_label(conf.low = "95%CI")| term | estimate | 95%CI |
|---|---|---|
| 0%|5% | 19.70 | 4.36—88.96 |
| 5%|10% | 213.24 | 38.11—1,193.36 |
| 10%|15% | 384.67 | 65.22—2,268.68 |
| BMI_breaks2 | 1.45 | 0.50—4.24 |
| BMI_breaks3 | 0.77 | 0.23—2.59 |
| BMI_breaks4 | 6.20 | 2.32—16.57 |
| BMI_breaks5 | 54.85 | 18.46—162.94 |
| age | 1.01 | 0.98—1.04 |
| sexMan | 0.79 | 0.32—1.96 |
| sleep_apneaYes | 7.76 | 3.48—17.31 |
The probit link function will be used since this distribution provides the best fit for the fully adjusted model with nominal BMI terms:
dataposthoc %>%
nest(data = everything()) %>%
crossing(
link = c("logit", "probit", "cloglog", "loglog", "cauchit"),
threshold = c("flexible", "equidistant")
) %>%
mutate(
mod = map2(
data, link,
~clm(atelectasis_percent ~ sex + age + sleep_apnea, nominal = ~ BMI_breaks,
data = .x, link = .y
)
)
) %>%
mutate(
mod_summary = map(
mod,
~glance(.x) %>% mutate(logLik = as.numeric(logLik))
)
) %>%
unnest(mod_summary) %>%
dplyr::select(link, threshold, logLik, edf, AIC, BIC) %>%
arrange(logLik) %>%
gt()| link | threshold | logLik | edf | AIC | BIC |
|---|---|---|---|---|---|
| cauchit | equidistant | -122.5310 | 18 | 281.0619 | 343.4109 |
| cauchit | flexible | -122.5310 | 18 | 281.0619 | 343.4109 |
| loglog | equidistant | -118.1566 | 18 | 272.3132 | 334.6622 |
| loglog | flexible | -118.1566 | 18 | 272.3132 | 334.6622 |
| logit | equidistant | -116.7621 | 18 | 269.5241 | 331.8731 |
| logit | flexible | -116.7621 | 18 | 269.5241 | 331.8731 |
| cloglog | equidistant | -115.4116 | 18 | 266.8231 | 329.1721 |
| cloglog | flexible | -115.4116 | 18 | 266.8231 | 329.1721 |
| probit | equidistant | -112.8364 | 18 | 261.6729 | 324.0218 |
| probit | flexible | -112.8364 | 18 | 261.6729 | 324.0218 |
Fit a model that allows proportional odds for all explanatory variables, except BMI breaks:
fit_BMIpart <- vglm(atelectasis_percent ~ BMI_breaks+sleep_apnea+sex+agec, data=dataposthoc, link="probit", family=cumulative(parallel=FALSE~BMI_breaks, reverse = TRUE))
summary(fit_BMIpart)##
## Call:
## vglm(formula = atelectasis_percent ~ BMI_breaks + sleep_apnea +
## sex + agec, family = cumulative(parallel = FALSE ~ BMI_breaks,
## reverse = TRUE), data = dataposthoc, link = "probit")
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept):1 -3.1556 0.9404 -3.356 0.000792 ***
## (Intercept):2 -5.2392 1.3672 -3.832 0.000127 ***
## (Intercept):3 -5.2392 1.3672 -3.832 0.000127 ***
## BMI_breaks2:1 0.5447 0.6074 0.897 0.369884
## BMI_breaks2:2 0.9595 1.3094 0.733 0.463684
## BMI_breaks2:3 0.1719 1.4878 0.116 0.908037
## BMI_breaks3:1 -0.3682 0.7025 -0.524 0.600222
## BMI_breaks3:2 -15.8638 1886.9705 NA NA
## BMI_breaks3:3 -16.2682 2309.8837 NA NA
## BMI_breaks4:1 2.1480 0.5975 3.595 0.000325 ***
## BMI_breaks4:2 1.4814 1.2966 1.142 0.253253
## BMI_breaks4:3 0.2223 1.5445 0.144 0.885558
## BMI_breaks5:1 19.8276 816.4253 NA NA
## BMI_breaks5:2 3.9944 1.1612 3.440 0.000582 ***
## BMI_breaks5:3 3.3507 1.1756 2.850 0.004370 **
## sleep_apneaYes 2.6305 0.5908 4.452 8.49e-06 ***
## sexMan 0.0059 0.6166 0.010 0.992365
## agec 0.6393 0.7689 NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Names of linear predictors: logitlink(P[Y>=2]), logitlink(P[Y>=3]),
## logitlink(P[Y>=4])
##
## Residual deviance: 231.5156 on 690 degrees of freedom
##
## Log-likelihood: -115.7578 on 690 degrees of freedom
##
## Number of Fisher scoring iterations: 29
##
## Warning: Hauck-Donner effect detected in the following estimate(s):
## '(Intercept):2', '(Intercept):3', 'BMI_breaks3:2', 'BMI_breaks3:3', 'BMI_breaks5:1', 'sleep_apneaYes', 'agec'
##
##
## Exponentiated coefficients:
## BMI_breaks2:1 BMI_breaks2:2 BMI_breaks2:3 BMI_breaks3:1 BMI_breaks3:2
## 1.724076e+00 2.610434e+00 1.187509e+00 6.919932e-01 1.289564e-07
## BMI_breaks3:3 BMI_breaks4:1 BMI_breaks4:2 BMI_breaks4:3 BMI_breaks5:1
## 8.605832e-08 8.567477e+00 4.398903e+00 1.248940e+00 4.083510e+08
## BMI_breaks5:2 BMI_breaks5:3 sleep_apneaYes sexMan agec
## 5.429348e+01 2.852206e+01 1.388102e+01 1.005917e+00 1.895157e+00
## 2.5 % 97.5 %
## (Intercept):1 0.01 0.27
## (Intercept):2 0.00 0.08
## (Intercept):3 0.00 0.08
## BMI_breaks2:1 0.52 5.67
## BMI_breaks2:2 0.20 33.98
## BMI_breaks2:3 0.06 21.93
## BMI_breaks3:1 0.17 2.74
## BMI_breaks3:2 0.00 Inf
## BMI_breaks3:3 0.00 Inf
## BMI_breaks4:1 2.66 27.64
## BMI_breaks4:2 0.35 55.85
## BMI_breaks4:3 0.06 25.78
## BMI_breaks5:1 0.00 Inf
## BMI_breaks5:2 5.58 528.66
## BMI_breaks5:3 2.85 285.67
## sleep_apneaYes 4.36 44.19
## sexMan 0.30 3.37
## agec 0.42 8.55
Hauck-Donner effect leading to unreliable estumates. Thus, will not report this model.